0.1 Импорт

Импорт phyloseq объекта и библиотек
Датасет - хроносерия разложения соломы - от фактор Day 10ти уровней

library(phyloseq)
library(tidyverse)
library(ggpubr)
library(ampvis2)
library(ANCOMBC)
library(heatmaply)
library(compositions)
library(igraph)
library(WGCNA)
require(DESeq2)
require(phyloseq)

ps.f <- readRDS("psf2")

0.2 functions

#phyloseq object to ampvis2 object
#https://gist.github.com/KasperSkytte/8d0ca4206a66be7ff6d76fc4ab8e66c6

phyloseq_to_ampvis2 <- function(physeq) {
  #check object for class
  if(!any(class(physeq) %in% "phyloseq"))
    stop("physeq object must be of class \"phyloseq\"", call. = FALSE)
  
  #ampvis2 requires taxonomy and abundance table, phyloseq checks for the latter
  if(is.null(physeq@tax_table))
    stop("No taxonomy found in the phyloseq object and is required for ampvis2", call. = FALSE)
  
  #OTUs must be in rows, not columns
  if(phyloseq::taxa_are_rows(physeq))
    abund <- as.data.frame(phyloseq::otu_table(physeq)@.Data)
  else
    abund <- as.data.frame(t(phyloseq::otu_table(physeq)@.Data))
  
  #tax_table is assumed to have OTUs in rows too
  tax <- phyloseq::tax_table(physeq)@.Data
  
  #merge by rownames (OTUs)
  otutable <- merge(
    abund,
    tax,
    by = 0,
    all.x = TRUE,
    all.y = FALSE,
    sort = FALSE
  )
  colnames(otutable)[1] <- "OTU"
  
  #extract sample_data (metadata)
  if(!is.null(physeq@sam_data)) {
    metadata <- data.frame(
      phyloseq::sample_data(physeq),
      row.names = phyloseq::sample_names(physeq), 
      stringsAsFactors = FALSE, 
      check.names = FALSE
    )
    
    #check if any columns match exactly with rownames
    #if none matched assume row names are sample identifiers
    samplesCol <- unlist(lapply(metadata, function(x) {
      identical(x, rownames(metadata))}))
    
    if(any(samplesCol)) {
      #error if a column matched and it's not the first
      if(!samplesCol[[1]])
        stop("Sample ID's must be in the first column in the sample metadata, please reorder", call. = FALSE)
    } else {
      #assume rownames are sample identifiers, merge at the end with name "SampleID"
      if(any(colnames(metadata) %in% "SampleID"))
        stop("A column in the sample metadata is already named \"SampleID\" but does not seem to contain sample ID's", call. = FALSE)
      metadata$SampleID <- rownames(metadata)
      
      #reorder columns so SampleID is the first
      metadata <- metadata[, c(which(colnames(metadata) %in% "SampleID"), 1:(ncol(metadata)-1L)), drop = FALSE]
    }
  } else
    metadata <- NULL
  
  #extract phylogenetic tree, assumed to be of class "phylo"
  if(!is.null(physeq@phy_tree)) {
    tree <- phyloseq::phy_tree(physeq)
  } else
    tree <- NULL
  
  #extract OTU DNA sequences, assumed to be of class "XStringSet"
  if(!is.null(physeq@refseq)) {
    #convert XStringSet to DNAbin using a temporary file (easiest)
    fastaTempFile <- tempfile(pattern = "ampvis2_", fileext = ".fa")
    Biostrings::writeXStringSet(physeq@refseq, filepath = fastaTempFile)
  } else
    fastaTempFile <- NULL
  
  #load as normally with amp_load
  ampvis2::amp_load(
    otutable = otutable,
    metadata = metadata,
    tree = tree,
    fasta = fastaTempFile
  )
}

#variance stabilisation from DESeq2

ps_vst <- function(ps, factor){
  

  
  diagdds = phyloseq_to_deseq2(ps, as.formula(paste( "~", factor)))              
  diagdds = estimateSizeFactors(diagdds, type="poscounts")
  diagdds = estimateDispersions(diagdds, fitType = "local") 
  pst <- varianceStabilizingTransformation(diagdds)
  pst.dimmed <- t(as.matrix(assay(pst))) 
  # pst.dimmed[pst.dimmed < 0.0] <- 0.0
  ps.varstab <- ps
  otu_table(ps.varstab) <- otu_table(pst.dimmed, taxa_are_rows = FALSE) 
  return(ps.varstab)
}

#WGCNA visualisation
#result - list class object with attributes:
# ps - phyloseq object
# amp - ampwis2 object
# heat - heatmap with absalute read numbers
# heat_rel - hetmap with relative abundances
# tree - phylogenetic tree with taxonomy

color_filt <- function(ps, df){
  
      library(tidyverse)
      library(reshape2)
      library(gridExtra)

  
  l = list()
  for (i in levels(df$module)){
    message(i)
    color_name <-  df %>% 
      filter(module == i) %>% 
      pull(asv) %>% 
      unique()
    ps.col <- prune_taxa(color_name, ps)
    amp.col <- phyloseq_to_ampvis2(ps.col)
    heat <- amp_heatmap(amp.col, tax_show = 60, 
              group_by = "Day", 
              tax_aggregate = "OTU",
              tax_add = "Genus", 
              normalise=FALSE, 
              showRemainingTaxa = TRUE)
    
    ps.rel  <-  phyloseq::transform_sample_counts(ps.col, function(x) x / sum(x) * 100)
    amp.r <- phyloseq_to_ampvis2(ps.rel)
    heat.rel <- amp_heatmap(amp.r, tax_show = 60, 
          group_by = "Day", 
          tax_aggregate = "OTU",
          tax_add = "Genus", 
          normalise=FALSE, 
          showRemainingTaxa = TRUE)
    
    tree <- ps.col@phy_tree
    taxa <- as.data.frame(ps.col@tax_table@.Data)
    p1 <- ggtree(tree) + 
              geom_tiplab(size=2, align=TRUE, linesize=.5) +
              theme_tree2()
    
    taxa[taxa == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Allorhizobium"
    taxa[taxa == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
  
    tx <- taxa %>% 
      rownames_to_column("id") %>% 
      mutate(id = factor(id, levels = rev(get_taxa_name(p1)))) %>% 
      dplyr::select(-c(Kingdom, Species, Order)) %>% 
      melt(id.var = 'id')

    p2 <- ggplot(tx, aes(variable, id)) + 
      geom_tile(aes(fill = value), alpha = 0.4) +
      geom_text(aes(label = value), size = 3) +
      theme_bw() + 
      theme(legend.position = "none",
             axis.ticks.x=element_blank(),
             axis.text.y=element_blank(),
             axis.ticks.y=element_blank(),
             axis.title.x = element_blank(),
            axis.title.y = element_blank()) 

    p <- ggpubr::ggarrange(p1, p2, widths = c(0.9, 1))
    l[[i]] <- list("ps" = ps.col, 
                   "amp" = amp.col,
                   "heat" = heat,
                   "heat_rel" = heat.rel,
                   "tree" = p,
                   "taxa" = knitr::kable(taxa))
                    
  }
  return(l)
}


color_filt_broken <- function(ps, df, ps.pruned){
  
      library(tidyverse)
      library(reshape2)
      library(gridExtra)

  
  l = list()
  for (i in levels(df$module)){
    message(i)
    color_name <-  df %>% 
      filter(module == i) %>% 
      pull(asv) %>% 
      unique()
    ps.col <- prune_taxa(color_name, ps)
    ps.col.pruned <- prune_taxa(color_name, ps.pruned)
    amp.col <- phyloseq_to_ampvis2(ps.col)
    heat <- amp_heatmap(amp.col, tax_show = 60, 
              group_by = "Day", 
              tax_aggregate = "OTU",
              tax_add = "Genus", 
              normalise=FALSE, 
              showRemainingTaxa = TRUE)
    
    ps.rel  <-  phyloseq::transform_sample_counts(ps.col, function(x) x / sum(x) * 100)
    amp.r <- phyloseq_to_ampvis2(ps.rel)
    heat.rel <- amp_heatmap(amp.r, tax_show = 60, 
          group_by = "Day", 
          tax_aggregate = "OTU",
          tax_add = "Genus", 
          normalise=FALSE, 
          showRemainingTaxa = TRUE)
    
    tree <- ps.col.pruned@phy_tree
    taxa <- as.data.frame(ps.col.pruned@tax_table@.Data)
    p1 <- ggtree(tree) + 
              geom_tiplab(size=2, align=TRUE, linesize=.5) +
              theme_tree2()
    
    taxa[taxa == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Allorhizobium"
    taxa[taxa == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
  
    tx <- taxa %>% 
      rownames_to_column("id") %>% 
      mutate(id = factor(id, levels = rev(get_taxa_name(p1)))) %>% 
      dplyr::select(-c(Kingdom, Species, Order)) %>% 
      melt(id.var = 'id')

    p2 <- ggplot(tx, aes(variable, id)) + 
      geom_tile(aes(fill = value), alpha = 0.4) +
      geom_text(aes(label = value), size = 3) +
      theme_bw() + 
      theme(legend.position = "none",
             axis.ticks.x=element_blank(),
             axis.text.y=element_blank(),
             axis.ticks.y=element_blank(),
             axis.title.x = element_blank(),
            axis.title.y = element_blank()) 

    p <- ggpubr::ggarrange(p1, p2, widths = c(0.9, 1))
    l[[i]] <- list("ps" = ps.col, 
                   "amp" = amp.col,
                   "heat" = heat,
                   "heat_rel" = heat.rel,
                   "tree" = p,
                   "taxa" = knitr::kable(taxa))
                    
  }
  return(l)
}

detachAllPackages <- function() {

  basic.packages <- c("package:stats","package:graphics","package:grDevices","package:utils","package:datasets","package:methods","package:base")

  package.list <- search()[ifelse(unlist(gregexpr("package:",search()))==1,TRUE,FALSE)]

  package.list <- setdiff(package.list,basic.packages)

  if (length(package.list)>0)  for (package in package.list) detach(package, character.only=TRUE, force = TRUE)

}

plot_alpha_w_toc <- function(ps, group, metric) {
  
  require(phyloseq)
  require(ggplot2)
  
  ps_a <- prune_taxa(taxa_sums(ps) > 0, ps)
  
  er <- estimate_richness(ps_a)
  df_er <- cbind(ps_a@sam_data, er)
  df_er <- df_er %>% select(c(group, metric))
  stat.test <- aov(as.formula(paste0(metric, "~", group)), data = df_er) %>%
    rstatix::tukey_hsd()
  y <-  seq(max(er[[metric]]), length=length(stat.test$p.adj), by=max(er[[metric]]/20))

  plot_richness(ps_a, x=group, measures=metric) + 
    geom_boxplot() +
    geom_point(size=1.2, alpha=0.3) +
    ggpubr::stat_pvalue_manual(
      stat.test, 
      label = "p.adj.signif", 
      y.position = y) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) +
    labs(y=paste(metric, "index")) 
}


# standart NMDS plot tool frop phyloseq with some additional aestatics 
# have stress value on plot - may work as fuck
beta_custom_norm_NMDS_elli_w <- function(ps, seed = 7888, normtype="vst", Color="What", Group="Repeat"){
  require(phyloseq)
  require(ggplot2)
  require(ggpubr)
  library(ggforce)
  
  
  ordination.b <- ordinate(ps, "NMDS", "bray")
  mds <- as.data.frame(ordination.b$points)
  p  <-  plot_ordination(ps,
                         ordination.b,
                         type="sample",
                         color = Color,
                         title="NMDS - Bray-Curtis",
                         # title=NULL,
                         axes = c(1,2) ) + 
    theme_bw() + 
    theme(text = element_text(size = 10)) + 
    geom_point(size = 3) +
    annotate("text",
    x=min(mds$MDS1) + abs(min(mds$MDS1))/7,
    y=max(mds$MDS2),
    label=paste0("Stress -- ", round(ordination.b$stress, 3))) +
    geom_mark_ellipse(aes_string(group = Group, label = Group),
                      label.fontsize = 10,
                      label.buffer = unit(2, "mm"),
                      label.minwidth = unit(5, "mm"),
                      con.cap = unit(0.1, "mm"),
                      con.colour='gray') +
    theme(legend.position = "none") +
    ggplot2::scale_fill_viridis_c(option = "H")
  
  return(p)
}


# alpha with aov + tukie post-hock - useless, but it looks pretty good
plot_alpha_w_toc <- function(ps, group, metric) {
  
  require(phyloseq)
  require(ggplot2)
  
  ps_a <- prune_taxa(taxa_sums(ps) > 0, ps)
  
  er <- estimate_richness(ps_a)
  df_er <- cbind(ps_a@sam_data, er)
  df_er <- df_er %>% select(c(group, metric))
  stat.test <- aov(as.formula(paste0(metric, "~", group)), data = df_er) %>%
    rstatix::tukey_hsd()
  y <- seq(max(er[[metric]]), length=length(stat.test$p.adj.signif[stat.test$p.adj.signif != "ns"]), by=max(er[[metric]]/20))
  
  plot_richness(ps_a, x=group, measures=metric) + 
    geom_boxplot() +
    geom_point(size=1.2, alpha=0.3) +
    stat_pvalue_manual(
      stat.test, 
      label = "p.adj.signif", 
      y.position = y,
      hide.ns=TRUE) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) +
    labs(y=paste(metric, "index")) 
}

0.2.1 modify metadata


Add Group parameter to metadata -
- early - D01, D03, D05
- middle - D07, D08, D10
- late - D13, D14, D15

sample.data <- ps.f@sam_data %>% 
  data.frame() %>% 
  mutate(Group = if_else(Day %in% c("D01", "D03", "D05"), "early", 
                         if_else(Day %in% c("D07", "D08","D10"), "middle", "late"))) %>% 
  rename('Day' = 'Bag') %>% 
  mutate(Group = factor(Group, levels=c("early", "middle","late"))) %>% 
  mutate(Day = Bag %>% 
           forcats::fct_recode( "3" = "D01",
                                "14" = "D03",
                                "28" = "D05",
                                "49" = "D07" ,
                                "63" = "D08",
                                "91" = "D10",
                                "119" = "D12",
                                "140" = "D13",
                                "161" = "D14",
                                "182" = "D15")
         ) %>% 
  phyloseq::sample_data()

sample_data(ps.f) <- sample.data

0.3 Alpha

p1 <- plot_alpha_w_toc(ps = ps.f, group = "Day", metric = "Observed")
p2 <- plot_alpha_w_toc(ps = ps.f, group = "Day", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps.f, group = "Day", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)

p.observed <- plot_alpha_w_toc(ps = ps.f, group = "Group", metric = c("Observed")) + 
  theme(axis.title.y = element_blank())

p.shannon <- plot_alpha_w_toc(ps = ps.f, group = "Group", metric = c("Shannon")) + 
  theme(axis.title.y = element_blank())

p.simpson <- plot_alpha_w_toc(ps = ps.f, group = "Group", metric = c("InvSimpson")) + 
  theme(axis.title.y = element_blank())

ggpubr::ggarrange(p.observed, p.shannon, p.simpson, ncol = 3)

0.3.0.1 mpd

mpd - индекс альфа-разнообразия “лохматости дерева” - считается отдельно т.к. есть в отдельном пакете со встроенной достоверностью(на пермутациях)
вот ссылка на значение в колонках:
https://www.rdocumentation.org/packages/picante/versions/1.8.2/topics/ses.mpd
в общем early stage достоверно менее разнообразна, чем остальные стадии

physeq_merged <- merge_samples(ps.f, "Group", fun=sum)
# ps.f@sam_data

# picante::mpd(l_vst$blue$ps@otu_table@.Data, cophenetic(l_vst$blue$ps@phy_tree)) %>% 
#   mean(na.rm = TRUE)
# 
# picante::mpd(l_vst$salmon$ps@otu_table@.Data, cophenetic(l_vst$salmon$ps@phy_tree)) %>% 
#   mean(na.rm = TRUE)

mpd.res <- picante::ses.mpd(physeq_merged@otu_table@.Data, cophenetic(physeq_merged@phy_tree)) 

as.data.frame(mpd.res)
##        ntaxa  mpd.obs mpd.rand.mean mpd.rand.sd mpd.obs.rank  mpd.obs.z
## early    364 1.742096      1.866758  0.02833040            1 -4.4002687
## middle   490 1.839665      1.863394  0.02359817          155 -1.0055407
## late     822 1.877129      1.863861  0.01332567          842  0.9956649
##        mpd.obs.p runs
## early      0.001  999
## middle     0.155  999
## late       0.842  999

0.4 beta statistic

0.4.1 permanova

permanova - group significantlly different - dispersion between is more, than inside groups

dist <- phyloseq::distance(ps.f, "bray")
metadata <- as(sample_data(ps.f@sam_data), "data.frame")
vegan::adonis2(dist ~ Group, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = dist ~ Group, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## Group     2   3.2742 0.33893 8.2033  0.001 ***
## Residual 32   6.3861 0.66107                  
## Total    34   9.6604 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

0.4.2 dynamic of beta diversity

Дистанция брей кертиса по шагам между каждыми днями.   (Например - все D15 против D14)   Может на эту картинку подвесить еще почвенное дыхание?

ps.f.r <- rarefy_even_depth(ps.f, rngseed = 777)

avg.r <- ps.f.r@otu_table %>% 
  as.data.frame() %>%
  vegan::avgdist(10)

avg <- ps.f@otu_table %>% 
  as.data.frame() %>%
  vegan::avgdist(10)

# avg %>%
#   as.matrix() %>%
#   as_tibble(rownames= "sample") %>%
#   pivot_longer(-sample) %>%
#   filter(sample < name) %>%
#   mutate(repeat_a = str_replace(sample, ".*-", ""),
#          repeat_b = str_replace(name, ".*-", ""), 
#          day_a = as.numeric(str_replace(sapply(strsplit(sample, "-"), `[`, 3), "D", "")),
#           day_b = as.numeric(str_replace(sapply(strsplit(name, "-"), `[`, 3), "D", "")),
#          diff = abs(day_a - day_b),
#          early = day_a < 10) %>% 
#   filter(repeat_a == repeat_b & diff < 10) %>%
#   group_by(diff, repeat_a, early) %>%
#   summarize(median = median(value)) %>%
#   ungroup() %>%
#   ggplot(aes(x=diff, y=median, color=early, group=paste0(repeat_a, early))) +
#   geom_line(size=0.25) +
#   geom_smooth(aes(group=early), se=FALSE, size=4) +
#   labs(x="Distance between time points",
#        y="Median Bray-Curtis distance") +
#   scale_x_continuous(breaks=1:9) +
#   scale_color_manual(name=NULL,
#                      breaks=c(TRUE, FALSE),
#                      values=c("blue", "red"),
#                      labels=c("Early", "Late")) +
#   guides(color = guide_legend(override.aes = list(size=1))) +
#   theme_classic()

avg.r %>%
  as.matrix() %>%
  as_tibble(rownames= "sample") %>%
  pivot_longer(-sample) %>%
  filter(sample < name) %>%
  mutate(repeat_a = str_replace(sample, ".*-", ""),
         repeat_b = str_replace(name, ".*-", ""), 
         day_a = as.numeric(str_replace(sapply(strsplit(sample, "-"), `[`, 3), "D", "")),
          day_b = as.numeric(str_replace(sapply(strsplit(name, "-"), `[`, 3), "D", ""))) %>% 
  mutate(day_a = as.numeric(as.factor(day_a) %>% forcats::fct_recode("2" = "3", "3" = "5", "4" = "7", "5" = "8", "6" = "10", "7" = "13", "8" = "14", "9" = "14", "10" = "15")),
         day_b = as.numeric(as.factor(day_b) %>% forcats::fct_recode("2" = "3", "3" = "5", "4" = "7", "5" = "8", "6" = "10", "7" = "12", "8" = "13", "9" = "14", "10" = "15")),
         diff = abs(day_a - day_b)) %>% 
  filter(diff == 1) %>% 
  mutate(day_b = as.factor(day_b) %>% forcats::fct_recode("D03" = "2", "D05" = "3","D07" = "4",  "D08" = "5", "D10" = "6", "D13" = "7", "D14" = "8", "D15" = "10", "D12" = "7")) %>% 
  ggplot(aes(x=day_b, y=value)) +
    geom_boxplot() +
  theme_bw() +
  labs(x="Time points",
       y="Bray-Curtis distance")

0.4.3 basic NMDS plot

По картинке кажется что происходит постепенное замедление изменений -
но из предыдущей картинки следует, что это не так - есть провал между 7-8-10,
но в среднем точки различаются довольно одинакого.

beta_custom_norm_NMDS_elli_w(ps.f, C="Group", G="Day")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1048129 
## Run 1 stress 0.1372731 
## Run 2 stress 0.1318495 
## Run 3 stress 0.1048129 
## ... Procrustes: rmse 0.00004856956  max resid 0.0001949092 
## ... Similar to previous best
## Run 4 stress 0.1048129 
## ... New best solution
## ... Procrustes: rmse 0.00001842645  max resid 0.00006948497 
## ... Similar to previous best
## Run 5 stress 0.1426236 
## Run 6 stress 0.1048204 
## ... Procrustes: rmse 0.001770292  max resid 0.007527799 
## ... Similar to previous best
## Run 7 stress 0.1064532 
## Run 8 stress 0.1064733 
## Run 9 stress 0.1048204 
## ... Procrustes: rmse 0.001767644  max resid 0.007510715 
## ... Similar to previous best
## Run 10 stress 0.1064532 
## Run 11 stress 0.1318495 
## Run 12 stress 0.1064733 
## Run 13 stress 0.1064733 
## Run 14 stress 0.1460032 
## Run 15 stress 0.1048204 
## ... Procrustes: rmse 0.001769848  max resid 0.007525048 
## ... Similar to previous best
## Run 16 stress 0.1064532 
## Run 17 stress 0.1372728 
## Run 18 stress 0.1048129 
## ... Procrustes: rmse 0.000002711789  max resid 0.0000100797 
## ... Similar to previous best
## Run 19 stress 0.1460029 
## Run 20 stress 0.146003 
## *** Solution reached

0.5 Splitting the dataset

0.5.1 Basic information about dataset

amp <- phyloseq_to_ampvis2(ps.f)
amp
## ampvis2 object with 5 elements. 
## Summary of OTU table:
##      Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
##           35         1245       521643         4412        33561        12894 
##    Avg#Reads 
##     14904.09 
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##   1245(100%)   1245(100%) 1233(99.04%)  1184(95.1%) 1063(85.38%)  736(59.12%) 
##      Species 
##    90(7.23%) 
## 
## Metadata variables: 5 
##  SampleID, Bag, Description, Group, Day

Разделим датасет на две группы

1-я - в более чем 10% образцов должно быть хотя бы 10 ридов - эта группа пойдет в анализ далее

ps.inall <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) > (0.1*length(x)), TRUE)

amp.inall <- phyloseq_to_ampvis2(ps.inall)
amp_heatmap(amp.inall,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=FALSE,
            showRemainingTaxa = TRUE)

Вторая - оставшиеся, но более 100 ридов по всем образцам(далее - вылетающие мажоры(ВМ))
Остальные филотипы выкидываем из анализа

ps.exl  <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) < (0.1*length(x)), TRUE)
ps.exl  <- prune_taxa(taxa_sums(ps.exl) > 100, ps.exl)
amp.exl <- phyloseq_to_ampvis2(ps.exl)
amp_heatmap(amp.exl,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=FALSE,
            showRemainingTaxa = TRUE)

То же, но c относительной представленностью.

ps.per  <-  phyloseq::transform_sample_counts(ps.f, function(x) x / sum(x) * 100) 
ps.exl.taxa <- taxa_names(ps.exl)
ps.per.exl <- prune_taxa(ps.exl.taxa, ps.per)
amp.exl.r <- phyloseq_to_ampvis2(ps.per.exl)

amp_heatmap(amp.exl.r, tax_show = 60, 
      group_by = "Day", 
      tax_aggregate = "OTU",
      tax_add = "Genus", 
      round =  2, 
      normalise=FALSE, 
      showRemainingTaxa = TRUE)

amp_heatmap(amp.exl.r, tax_show = 60, 
      group_by = "Day", 
      tax_aggregate = "OTU",
      tax_add = "Genus", 
      round =  2, 
      normalise=FALSE, )

ВМ объединенные по родам. Относительная представленность.

amp_heatmap(amp.exl.r,
            tax_show = 30, 
            group_by = "Day", 
            tax_aggregate = "Genus",
            tax_add = "Phylum",
            tax_class = "Proteobacteria",
            round =  2,
            normalise=FALSE, 
            showRemainingTaxa = TRUE)

Если посмотреть, если как распределяются ВМ по дням - похоже распределение ВМ связано не только с особенностью отдельных мешочков, но и с чисто техническими особенностями - вылетающие значения вылетают в основном не с биологическими повторностями, а с техническими(красная линия - выбрана визуально)

p_box <- phyloseq::sample_sums(ps.per.exl) %>% 
  as.data.frame(col.names = "values") %>% 
  setNames(., nm = "values") %>% 
  rownames_to_column("samples") %>% 
  mutate(Day = sapply(strsplit(samples, "-"), `[`, 3)) %>% 
  ggplot(aes(x=Day, y=values, color=Day, fill = Day)) +
  geom_boxplot(aes(color=Day, fill = Day)) +
  geom_point(color = "black", position = position_dodge(width=0.2)) +
  geom_hline(yintercept = 10, colour = "red") +
  theme_bw() +
  theme(legend.position = "none")
     
p_box <- p_box + viridis::scale_color_viridis(option = "H", discrete = TRUE, direction=1, begin=0.1, end = 0.9, alpha = 0.5)

p_box + viridis::scale_fill_viridis(option = "H", discrete = TRUE, direction=1, begin=0.1, end = 0.9, alpha = 0.3)

0.6 WGCNA

0.6.1 clr

после clr нормализации / выглядит отвратительно попробуем нормализацию vst из DESeq2

otu.inall <- as.data.frame(ps.inall@otu_table@.Data) 
ps.inall.clr <- ps.inall
otu_table(ps.inall.clr) <- phyloseq::otu_table(compositions::clr(otu.inall), taxa_are_rows = FALSE)
data <- ps.inall.clr@otu_table@.Data %>% 
  as.data.frame()
rownames(data) <- as.character(ps.inall.clr@sam_data$Description)
powers <-  c(c(1:10), seq(from = 12, to=30, by=1))
sft <-  pickSoftThreshold(data, powerVector = powers, verbose = 5, networkType = "signed")
## pickSoftThreshold: will use block size 338.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 338 of 338
## Warning: executing %dopar% sequentially: no parallel backend registered
##    Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k.  max.k.
## 1      1    0.185  23.60         0.8790 168.0000 168.00000 172.000
## 2      2    0.199 -15.50         0.8970  87.6000  87.20000  95.100
## 3      3    0.518 -13.10         0.8580  47.3000  46.80000  55.900
## 4      4    0.390  -6.46         0.8050  26.5000  26.00000  34.500
## 5      5    0.262  -3.46         0.7820  15.4000  14.90000  22.300
## 6      6    0.232  -2.30         0.7770   9.2300   8.78000  15.000
## 7      7    0.251  -1.87         0.6990   5.7300   5.36000  10.500
## 8      8    0.319  -1.75         0.7570   3.6800   3.41000   7.620
## 9      9    0.527  -2.07         0.7770   2.4400   2.20000   5.890
## 10    10    0.648  -2.28         0.8490   1.6600   1.47000   4.690
## 11    12    0.827  -2.41         0.8970   0.8420   0.69300   3.190
## 12    13    0.793  -2.38         0.7930   0.6220   0.49000   2.700
## 13    14    0.277  -3.97         0.0914   0.4690   0.35500   2.320
## 14    15    0.282  -3.83         0.1000   0.3610   0.26100   2.010
## 15    16    0.894  -2.17         0.9030   0.2820   0.19800   1.770
## 16    17    0.905  -2.10         0.9080   0.2240   0.15000   1.560
## 17    18    0.922  -2.04         0.9240   0.1810   0.11500   1.390
## 18    19    0.927  -2.01         0.9230   0.1480   0.08820   1.250
## 19    20    0.938  -1.93         0.9370   0.1220   0.06780   1.130
## 20    21    0.946  -1.86         0.9470   0.1020   0.05260   1.020
## 21    22    0.315  -3.12         0.2220   0.0856   0.04140   0.926
## 22    23    0.303  -3.96         0.1560   0.0728   0.03270   0.844
## 23    24    0.307  -2.92         0.1940   0.0623   0.02590   0.772
## 24    25    0.303  -2.84         0.1800   0.0538   0.02060   0.708
## 25    26    0.921  -1.63         0.8990   0.0467   0.01650   0.657
## 26    27    0.892  -1.61         0.8650   0.0408   0.01320   0.622
## 27    28    0.155  -1.93        -0.0202   0.0359   0.01040   0.591
## 28    29    0.199  -2.77        -0.0228   0.0317   0.00841   0.563
## 29    30    0.199  -2.69        -0.0203   0.0282   0.00684   0.537
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

0.6.2 vst

после vst нормализации

ps.varstab <- ps_vst(ps.inall, "Day")

data2 <- ps.varstab@otu_table@.Data %>% 
  as.data.frame()

rownames(data2) <- as.character(ps.varstab@sam_data$Description)
powers <-  c(seq(from = 1, to=10, by=0.5), seq(from = 11, to=20, by=1))
sft2 <-  pickSoftThreshold(data2, powerVector = powers, verbose = 5, networkType = "signed hybrid")
## pickSoftThreshold: will use block size 338.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 338 of 338
##    Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1    1.0    0.409 -1.05          0.704  50.100  45.60000  97.80
## 2    1.5    0.578 -1.07          0.895  30.900  27.00000  71.20
## 3    2.0    0.723 -1.15          0.942  20.300  17.90000  53.60
## 4    2.5    0.767 -1.24          0.871  14.000  12.00000  41.70
## 5    3.0    0.809 -1.31          0.909  10.000   7.86000  33.40
## 6    3.5    0.872 -1.31          0.958   7.380   5.39000  27.30
## 7    4.0    0.888 -1.30          0.968   5.580   3.76000  22.60
## 8    4.5    0.858 -1.36          0.910   4.320   2.67000  19.00
## 9    5.0    0.892 -1.36          0.943   3.400   1.94000  16.20
## 10   5.5    0.899 -1.38          0.954   2.720   1.41000  13.90
## 11   6.0    0.896 -1.39          0.962   2.210   1.07000  12.10
## 12   6.5    0.897 -1.33          0.962   1.820   0.84700  10.60
## 13   7.0    0.910 -1.30          0.970   1.520   0.65400   9.30
## 14   7.5    0.914 -1.31          0.966   1.280   0.51500   8.25
## 15   8.0    0.913 -1.28          0.954   1.090   0.41500   7.36
## 16   8.5    0.905 -1.28          0.937   0.938   0.34000   6.59
## 17   9.0    0.895 -1.30          0.913   0.813   0.27800   5.99
## 18   9.5    0.878 -1.33          0.900   0.709   0.23200   5.49
## 19  10.0    0.886 -1.34          0.894   0.624   0.19100   5.07
## 20  11.0    0.911 -1.31          0.928   0.491   0.12500   4.35
## 21  12.0    0.890 -1.29          0.894   0.396   0.08590   3.79
## 22  13.0    0.910 -1.26          0.918   0.325   0.06120   3.33
## 23  14.0    0.882 -1.25          0.856   0.272   0.04420   2.95
## 24  15.0    0.889 -1.19          0.864   0.230   0.03230   2.63
## 25  16.0    0.913 -1.18          0.893   0.198   0.02240   2.35
## 26  17.0    0.930 -1.16          0.912   0.172   0.01620   2.20
## 27  18.0    0.955 -1.19          0.947   0.151   0.01190   2.09
## 28  19.0    0.908 -1.21          0.896   0.134   0.00896   1.99
## 29  20.0    0.916 -1.21          0.906   0.120   0.00649   1.91
plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

# WGCNA, as old and not supported
detachAllPackages()
library(WGCNA)


net3 <- WGCNA::blockwiseModules(data2,
                          power=5.5,
                          TOMType="signed",
                          networkType="signed hybrid",
                          nThreads=0)


mergedColors2 <-  WGCNA::labels2colors(net3$colors, colorSeq = c("salmon", "darkgreen", "cyan", "red", "blue", "plum"))

plotDendroAndColors(
  net3$dendrograms[[1]],
  mergedColors2[net3$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE,
  hang = 0.03,
  addGuide = TRUE,
  guideHang = 0.05)

0.6.2.1 Импорт 2

library(phyloseq)
library(tidyverse)
library(ggpubr)
library(ampvis2)
library(heatmaply)
library(WGCNA)
library(phyloseq)
library(ggtree)
library(tidyverse)
library(KneeArrower)
modules_of_interest = mergedColors2 %>% 
  unique()

module_df <- data.frame(
  asv = names(net3$colors),
  colors = mergedColors2
)

# module_df[module_df == "yellow"] <- "salmon"

submod <-  module_df %>%
  subset(colors %in% modules_of_interest)

row.names(module_df) = module_df$asv

subexpr = as.data.frame(t(data2))[submod$asv,]

submod_df <- data.frame(subexpr) %>%
  mutate(
    asv = row.names(.)
  ) %>%
  pivot_longer(-asv) %>%
  mutate(
    module = module_df[asv,]$colors
  )

submod_df <- submod_df %>% 
  mutate(name = gsub("\\_.*","",submod_df$name)) %>% 
  group_by(name, asv) %>% 
  summarise(value = mean(value), asv = asv, module = module) %>% 
  relocate(c(asv, name, value, module)) %>% 
  ungroup() %>% 
  mutate(module = as.factor(module))

p <- submod_df %>% 
  ggplot(., aes(x=name, y=value, group=asv)) +
  geom_line(aes(color = module),
            alpha = 0.2) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90),
    legend.position = "none") +
  facet_grid(rows = vars(module)) +
  labs(x = "treatment",
       y = "normalized expression")

p + scale_color_manual(values = levels(submod_df$module))

0.6.2.2 ordination

UNIFRAC - колбаска сужается

ps.inall.col <- ps.inall
df <- module_df %>% 
  rename("id" = "asv")
df <- df %>% 
  dplyr::select(-"id") %>% 
  mutate(colors = as.factor(colors))
taxa <- as.data.frame(ps.inall@tax_table@.Data)
tx <- cbind(taxa, df)
tx$colors <- factor(tx$colors, levels = c("salmon", "darkgreen", "cyan", "red", "blue", "plum"))
tax_table(ps.inall.col) <- tax_table(as.matrix(tx))
ord <- ordinate(ps.inall.col, "NMDS", "unifrac")
## Run 0 stress 0.0685455 
## Run 1 stress 0.05568715 
## ... New best solution
## ... Procrustes: rmse 0.04076806  max resid 0.2160826 
## Run 2 stress 0.06784429 
## Run 3 stress 0.05628488 
## Run 4 stress 0.05568719 
## ... Procrustes: rmse 0.0001769095  max resid 0.0006062451 
## ... Similar to previous best
## Run 5 stress 0.07937976 
## Run 6 stress 0.07544596 
## Run 7 stress 0.05628489 
## Run 8 stress 0.05568719 
## ... Procrustes: rmse 0.00002559041  max resid 0.00008856774 
## ... Similar to previous best
## Run 9 stress 0.05568712 
## ... New best solution
## ... Procrustes: rmse 0.00002106782  max resid 0.00006934972 
## ... Similar to previous best
## Run 10 stress 0.06740137 
## Run 11 stress 0.06740136 
## Run 12 stress 0.06784421 
## Run 13 stress 0.06784436 
## Run 14 stress 0.06854546 
## Run 15 stress 0.07953205 
## Run 16 stress 0.05568719 
## ... Procrustes: rmse 0.00004255519  max resid 0.0001401031 
## ... Similar to previous best
## Run 17 stress 0.06740138 
## Run 18 stress 0.06728216 
## Run 19 stress 0.05568718 
## ... Procrustes: rmse 0.0000373923  max resid 0.0001312747 
## ... Similar to previous best
## Run 20 stress 0.07888487 
## *** Solution reached
plot_ordination(ps.inall.col, ord, type = "species", color = "colors") + 
  scale_color_manual(values =  c("salmon", "darkgreen", "cyan", "red", "blue", "plum")) +
  theme_bw() +
  theme(legend.position = "none")

Bray - колбаска равномерна
Поздние стадии слева - при этом ранние кластеры накладываются, поздние разделены Что влияет на ось2? Явно есть какой-то паттерн.

ord <- ordinate(ps.inall.col, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.09036937 
## Run 1 stress 0.0903694 
## ... Procrustes: rmse 0.00001927814  max resid 0.00007086537 
## ... Similar to previous best
## Run 2 stress 0.09024752 
## ... New best solution
## ... Procrustes: rmse 0.003386048  max resid 0.01505777 
## Run 3 stress 0.09036938 
## ... Procrustes: rmse 0.003390695  max resid 0.01505826 
## Run 4 stress 0.09036937 
## ... Procrustes: rmse 0.003386035  max resid 0.01507802 
## Run 5 stress 0.09036937 
## ... Procrustes: rmse 0.003386432  max resid 0.01505862 
## Run 6 stress 0.1257473 
## Run 7 stress 0.09036937 
## ... Procrustes: rmse 0.003385691  max resid 0.01507976 
## Run 8 stress 0.1268919 
## Run 9 stress 0.09036944 
## ... Procrustes: rmse 0.003410952  max resid 0.01523599 
## Run 10 stress 0.09024752 
## ... New best solution
## ... Procrustes: rmse 0.00003844186  max resid 0.0001638862 
## ... Similar to previous best
## Run 11 stress 0.09036937 
## ... Procrustes: rmse 0.003380784  max resid 0.0150593 
## Run 12 stress 0.09036938 
## ... Procrustes: rmse 0.003379871  max resid 0.01508219 
## Run 13 stress 0.09036939 
## ... Procrustes: rmse 0.003385498  max resid 0.01512324 
## Run 14 stress 0.1358024 
## Run 15 stress 0.09024754 
## ... Procrustes: rmse 0.00003794727  max resid 0.0001665169 
## ... Similar to previous best
## Run 16 stress 0.1258672 
## Run 17 stress 0.1358024 
## Run 18 stress 0.09036937 
## ... Procrustes: rmse 0.003376715  max resid 0.01503468 
## Run 19 stress 0.126985 
## Run 20 stress 0.1360016 
## *** Solution reached
plot_ordination(ps.inall.col, ord, type = "species", color = "colors") + 
  scale_color_manual(values =  c("salmon", "darkgreen", "cyan", "red", "blue", "plum")) +
  theme_bw() +
  theme(legend.position = "none")

0.6.2.3 visualisation

Далее идут одинаковые картинки по всем группам.

l_vst <- color_filt(ps.inall, submod_df)
l_vst

$blue \(blue\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 86 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 86 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 86 tips and 85 internal nodes ] refseq() DNAStringSet: [ 86 reference sequences ]

\(blue\)amp ampvis2 object with 5 elements. Summary of OTU table: Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads 35 86 89493 0 11387 1819 Avg#Reads 2556.94

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 86(100%) 86(100%) 86(100%) 85(98.84%) 79(91.86%) 50(58.14%) 7(8.14%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(blue\)heat \(blue\)heat_rel \(blue\)tree \(blue\)taxa

Kingdom Phylum Class Order Family Genus Species
Seq314 Bacteria Cyanobacteria Vampirivibrionia Obscuribacterales Obscuribacteraceae NA NA
Seq23 Bacteria Cyanobacteria Vampirivibrionia Obscuribacterales Obscuribacteraceae NA NA
Seq65 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingomonas NA
Seq188 Bacteria Proteobacteria Alphaproteobacteria Reyranellales Reyranellaceae Reyranella soli
Seq334 Bacteria Proteobacteria Alphaproteobacteria Reyranellales Reyranellaceae Reyranella NA
Seq75 Bacteria Proteobacteria Alphaproteobacteria Reyranellales Reyranellaceae Reyranella NA
Seq167 Bacteria Proteobacteria Alphaproteobacteria Reyranellales Reyranellaceae Reyranella massiliensis
Seq273 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Kaistiaceae Kaistia NA
Seq131 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudolabrys NA
Seq138 Bacteria Proteobacteria Alphaproteobacteria Micropepsales Micropepsaceae NA NA
Seq104 Bacteria Proteobacteria Alphaproteobacteria Micropepsales Micropepsaceae NA NA
Seq129 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Altererythrobacter NA
Seq28 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Devosiaceae Devosia NA
Seq360 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Mesorhizobium NA
Seq292 Bacteria Proteobacteria Alphaproteobacteria Caulobacterales Caulobacteraceae NA NA
Seq166 Bacteria Proteobacteria Alphaproteobacteria Caulobacterales Hyphomonadaceae Hirschia NA
Seq39 Bacteria Gemmatimonadota Gemmatimonadetes Gemmatimonadales Gemmatimonadaceae NA NA
Seq30 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales Solirubrobacteraceae Conexibacter NA
Seq151 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales 67-14 NA NA
Seq250 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales 67-14 NA NA
Seq135 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq46 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq35 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq106 Bacteria Myxococcota Polyangia Polyangiales Sandaracinaceae NA NA
Seq345 Bacteria Bdellovibrionota Oligoflexia 0319-6G20 NA NA NA
Seq198 Bacteria Bdellovibrionota Bdellovibrionia Bdellovibrionales Bdellovibrionaceae Bdellovibrio NA
Seq97 Bacteria Bdellovibrionota Bdellovibrionia Bdellovibrionales Bdellovibrionaceae Bdellovibrio NA
Seq183 Bacteria Dependentiae Babeliae Babeliales UBA12409 NA NA
Seq424 Bacteria Actinobacteriota Actinobacteria Propionibacteriales Nocardioidaceae Kribbella NA
Seq136 Bacteria Actinobacteriota Acidimicrobiia Microtrichales Iamiaceae Iamia NA
Seq174 Bacteria Actinobacteriota Acidimicrobiia Microtrichales Iamiaceae Iamia NA
Seq43 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Galbitalea NA
Seq420 Bacteria Actinobacteriota Actinobacteria Micromonosporales Micromonosporaceae Luedemannella NA
Seq127 Bacteria Actinobacteriota Actinobacteria Micromonosporales Micromonosporaceae Dactylosporangium NA
Seq288 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae NA NA
Seq449 Bacteria Actinobacteriota Actinobacteria Propionibacteriales Propionibacteriaceae Jiangella NA
Seq91 Bacteria Bacteroidota Bacteroidia Bacteroidales NA NA NA
Seq149 Bacteria Bacteroidota Bacteroidia Bacteroidetes VC2.1 Bac22 NA NA NA
Seq169 Bacteria Bacteroidota Bacteroidia Sphingobacteriales Sphingobacteriaceae Mucilaginibacter calamicampi
Seq81 Bacteria Bacteroidota Bacteroidia Sphingobacteriales NS11-12 marine group NA NA
Seq211 Bacteria Bacteroidota Bacteroidia NA NA NA NA
Seq119 Bacteria Bacteroidota Bacteroidia Sphingobacteriales env.OPS 17 NA NA
Seq77 Bacteria Spirochaetota Spirochaetia Spirochaetales Spirochaetaceae Salinispira NA
Seq175 Bacteria Spirochaetota Spirochaetia Spirochaetales Spirochaetaceae Spirochaeta 2 NA
Seq305 Bacteria Chloroflexi Anaerolineae Anaerolineales Anaerolineaceae NA NA
Seq379 Bacteria Chloroflexi Chloroflexia Chloroflexales Roseiflexaceae NA NA
Seq165 Bacteria Chloroflexi Chloroflexia Chloroflexales Roseiflexaceae NA NA
Seq327 Bacteria Chloroflexi Chloroflexia Thermomicrobiales JG30-KF-CM45 NA NA
Seq252 Bacteria Armatimonadota Fimbriimonadia Fimbriimonadales Fimbriimonadaceae NA NA
Seq382 Bacteria Verrucomicrobiota Verrucomicrobiae Pedosphaerales Pedosphaeraceae NA NA
Seq123 Bacteria Verrucomicrobiota Verrucomicrobiae Opitutales Opitutaceae Lacunisphaera limnophila
Seq124 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq325 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Chthoniobacteraceae LD29 NA
Seq272 Bacteria Verrucomicrobiota Verrucomicrobiae Verrucomicrobiales Verrucomicrobiaceae Roseimicrobium gellanilyticum
Seq402 Bacteria Verrucomicrobiota Verrucomicrobiae Verrucomicrobiales Rubritaleaceae Luteolibacter NA
Seq322 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae NA NA
Seq356 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae NA NA
Seq259 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae Pir4 lineage NA
Seq279 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae Pir4 lineage NA
Seq214 Bacteria Planctomycetota Planctomycetes Planctomycetales Rubinisphaeraceae SH-PL14 NA
Seq209 Bacteria Planctomycetota Planctomycetes Planctomycetales Schlesneriaceae Schlesneria NA
Seq229 Bacteria Acidobacteriota Vicinamibacteria Vicinamibacterales Vicinamibacteraceae NA NA
Seq275 Bacteria Acidobacteriota Vicinamibacteria Vicinamibacterales Vicinamibacteraceae NA NA
Seq247 Bacteria Acidobacteriota Vicinamibacteria Vicinamibacterales NA NA NA
Seq316 Bacteria Bacteroidota Kapabacteria Kapabacteriales NA NA NA
Seq371 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq12 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq153 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq326 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq10 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae NA NA
Seq82 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Terrimonas NA
Seq462 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq150 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Flavitalea NA
Seq290 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Edaphobaculum NA
Seq339 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Edaphobaculum NA
Seq120 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Taibaiella NA
Seq233 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales 211ds20 NA NA
Seq133 Bacteria Proteobacteria Gammaproteobacteria Steroidobacterales Steroidobacteraceae Steroidobacter flavus
Seq349 Bacteria Proteobacteria Gammaproteobacteria Steroidobacterales Steroidobacteraceae Steroidobacter NA
Seq315 Bacteria Proteobacteria Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq57 Bacteria Proteobacteria Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq96 Bacteria Proteobacteria Gammaproteobacteria R7C24 NA NA NA
Seq170 Bacteria Proteobacteria Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq94 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Rhodanobacteraceae Dokdonella ginsengisoli
Seq114 Bacteria Proteobacteria Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA
Seq235 Bacteria Proteobacteria Gammaproteobacteria Legionellales Legionellaceae Legionella NA

$cyan \(cyan\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 30 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 30 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 30 tips and 29 internal nodes ] refseq() DNAStringSet: [ 30 reference sequences ]

\(cyan\)amp ampvis2 object with 5 elements. Summary of OTU table: Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads 35 30 176073 336 15108 4911 Avg#Reads 5030.66

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 30(100%) 30(100%) 30(100%) 30(100%) 30(100%) 29(96.67%) 2(6.67%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(cyan\)heat \(cyan\)heat_rel \(cyan\)tree \(cyan\)taxa

Kingdom Phylum Class Order Family Genus Species
Seq98 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Burkholderiaceae Burkholderia telluris
Seq64 Bacteria Proteobacteria Alphaproteobacteria Azospirillales Inquilinaceae Inquilinus ginsengisoli
Seq4 Bacteria Proteobacteria Alphaproteobacteria Azospirillales Inquilinaceae Inquilinus NA
Seq244 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudorhodoplanes NA
Seq11 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Bradyrhizobium NA
Seq74 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Bradyrhizobium NA
Seq22 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Starkeya NA
Seq45 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium NA
Seq9 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium NA
Seq29 Bacteria Myxococcota Polyangia Nannocystales Nannocystaceae Nannocystis NA
Seq239 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Labilithrix NA
Seq50 Bacteria Actinobacteriota Actinobacteria Corynebacteriales Mycobacteriaceae Mycobacterium NA
Seq34 Bacteria Actinobacteriota Actinobacteria Streptosporangiales Streptosporangiaceae Herbidospora NA
Seq56 Bacteria Firmicutes Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq25 Bacteria Firmicutes Bacilli Bacillales Bacillaceae Terribacillus NA
Seq3 Bacteria Firmicutes Bacilli Bacillales Bacillaceae Bacillus NA
Seq5 Bacteria Firmicutes Bacilli Bacillales Bacillaceae Bacillus NA
Seq13 Bacteria Firmicutes Bacilli Bacillales Planococcaceae NA NA
Seq7 Bacteria Firmicutes Bacilli Bacillales Planococcaceae Solibacillus NA
Seq19 Bacteria Firmicutes Bacilli Bacillales Planococcaceae Solibacillus NA
Seq17 Bacteria Planctomycetota Planctomycetes Isosphaerales Isosphaeraceae Singulisphaera NA
Seq324 Bacteria Acidobacteriota Acidobacteriae Acidobacteriales Acidobacteriaceae (Subgroup 1) Edaphobacter NA
Seq16 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq6 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq1 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq109 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq49 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Taibaiella NA
Seq73 Bacteria Bacteroidota Bacteroidia Cytophagales Spirosomaceae Dyadobacter NA
Seq15 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Rhodanobacteraceae Luteibacter NA
Seq26 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Xanthomonadaceae Luteimonas NA

$darkgreen \(darkgreen\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 44 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 44 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 44 tips and 43 internal nodes ] refseq() DNAStringSet: [ 44 reference sequences ]

\(darkgreen\)amp ampvis2 object with 5 elements. Summary of OTU table: Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads 35 44 12360 0 2468 65 Avg#Reads 353.14

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 44(100%) 44(100%) 44(100%) 43(97.73%) 40(90.91%) 24(54.55%) 3(6.82%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(darkgreen\)heat \(darkgreen\)heat_rel \(darkgreen\)tree \(darkgreen\)taxa

Kingdom Phylum Class Order Family Genus Species
Seq342 Archaea Crenarchaeota Nitrososphaeria Nitrososphaerales Nitrososphaeraceae NA NA
Seq196 Archaea Crenarchaeota Nitrososphaeria Nitrososphaerales Nitrososphaeraceae Candidatus Nitrocosmicus NA
Seq276 Archaea Crenarchaeota Nitrososphaeria Nitrososphaerales Nitrososphaeraceae NA NA
Seq419 Archaea Crenarchaeota Nitrososphaeria Nitrososphaerales Nitrososphaeraceae NA NA
Seq454 Bacteria Proteobacteria Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae Aquicella NA
Seq329 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingomonas jaspsi
Seq332 Bacteria Proteobacteria Alphaproteobacteria Rhodospirillales Magnetospiraceae NA NA
Seq429 Bacteria Proteobacteria Alphaproteobacteria Acetobacterales Acetobacteraceae NA NA
Seq195 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudorhodoplanes NA
Seq144 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudolabrys NA
Seq369 Bacteria Proteobacteria Alphaproteobacteria Micropepsales Micropepsaceae NA NA
Seq362 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Altererythrobacter NA
Seq428 Bacteria Proteobacteria Alphaproteobacteria Caulobacterales Caulobacteraceae Phenylobacterium NA
Seq347 Bacteria Nitrospirota Nitrospiria Nitrospirales Nitrospiraceae Nitrospira japonica
Seq328 Bacteria Myxococcota Polyangia Haliangiales Haliangiaceae Haliangium NA
Seq348 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq308 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq256 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Minicystis NA
Seq302 Bacteria Bdellovibrionota Oligoflexia 0319-6G20 NA NA NA
Seq435 Bacteria Actinobacteriota Actinobacteria Propionibacteriales Propionibacteriaceae Microlunatus NA
Seq291 Bacteria Actinobacteriota Acidimicrobiia IMCC26256 NA NA NA
Seq392 Bacteria Actinobacteriota Acidimicrobiia Microtrichales Ilumatobacteraceae NA NA
Seq321 Bacteria Bacteroidota Bacteroidia Sphingobacteriales NS11-12 marine group NA NA
Seq186 Bacteria Firmicutes Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq204 Bacteria Firmicutes Bacilli Bacillales Planococcaceae Paenisporosarcina NA
Seq146 Bacteria Spirochaetota Spirochaetia Spirochaetales Spirochaetaceae Spirochaeta 2 NA
Seq359 Bacteria Patescibacteria Saccharimonadia Saccharimonadales LWQ8 NA NA
Seq269 Bacteria Chloroflexi Chloroflexia Chloroflexales Roseiflexaceae NA NA
Seq267 Bacteria Verrucomicrobiota Verrucomicrobiae Opitutales Opitutaceae Lacunisphaera NA
Seq320 Bacteria Verrucomicrobiota Verrucomicrobiae Opitutales Opitutaceae Opitutus NA
Seq203 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq497 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae NA NA
Seq384 Bacteria Planctomycetota Planctomycetes Planctomycetales NA NA NA
Seq155 Bacteria Acidobacteriota Blastocatellia Blastocatellales Blastocatellaceae NA NA
Seq350 Bacteria Bacteroidota Bacteroidia Sphingobacteriales env.OPS 17 NA NA
Seq224 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Terrimonas NA
Seq409 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Edaphobaculum NA
Seq337 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Taibaiella NA
Seq494 Bacteria Bacteroidota Bacteroidia Cytophagales Amoebophilaceae Candidatus Amoebophilus NA
Seq277 Bacteria Proteobacteria Gammaproteobacteria NA NA NA NA
Seq191 Bacteria Proteobacteria Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq335 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Xanthomonadaceae Luteimonas vadosa
Seq157 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Rhodanobacteraceae Dokdonella NA
Seq385 Bacteria Proteobacteria Gammaproteobacteria Salinisphaerales Solimonadaceae NA NA

$plum \(plum\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 30 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 30 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 30 tips and 29 internal nodes ] refseq() DNAStringSet: [ 30 reference sequences ]

\(plum\)amp ampvis2 object with 5 elements. Summary of OTU table: Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads 35 30 17172 0 3034 103 Avg#Reads 490.63

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 30(100%) 30(100%) 30(100%) 30(100%) 27(90%) 16(53.33%) 1(3.33%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(plum\)heat \(plum\)heat_rel \(plum\)tree \(plum\)taxa

Kingdom Phylum Class Order Family Genus Species
Seq161 Bacteria Proteobacteria Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae Aquicella NA
Seq181 Bacteria Proteobacteria Gammaproteobacteria CCD24 NA NA NA
Seq207 Bacteria Cyanobacteria Vampirivibrionia Obscuribacterales Obscuribacteraceae NA NA
Seq219 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingomonas naasensis
Seq216 Bacteria Proteobacteria Alphaproteobacteria Dongiales Dongiaceae Dongia NA
Seq220 Bacteria Proteobacteria Alphaproteobacteria Reyranellales Reyranellaceae Reyranella NA
Seq90 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudolabrys NA
Seq271 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae NA NA
Seq111 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingobium NA
Seq33 Bacteria Proteobacteria Alphaproteobacteria Caulobacterales Caulobacteraceae Caulobacter NA
Seq189 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales Solirubrobacteraceae Solirubrobacter NA
Seq85 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq208 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Galbitalea NA
Seq218 Bacteria Bacteroidota Bacteroidia Sphingobacteriales Sphingobacteriaceae Mucilaginibacter NA
Seq215 Bacteria Firmicutes Bacilli Bacillales Bacillaceae Bacillus NA
Seq177 Bacteria Fibrobacterota Fibrobacteria Fibrobacterales Fibrobacteraceae NA NA
Seq375 Bacteria Chloroflexi Chloroflexia Thermomicrobiales JG30-KF-CM45 NA NA
Seq430 Bacteria Planctomycetota Phycisphaerae Phycisphaerales Phycisphaeraceae NA NA
Seq368 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae NA NA
Seq152 Bacteria Planctomycetota Planctomycetes Gemmatales Gemmataceae Gemmata NA
Seq304 Bacteria Planctomycetota Planctomycetes Planctomycetales NA NA NA
Seq141 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Terrimonas NA
Seq343 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae NA NA
Seq412 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Edaphobaculum NA
Seq457 Bacteria Proteobacteria Gammaproteobacteria Coxiellales Coxiellaceae Coxiella NA
Seq130 Bacteria Proteobacteria Gammaproteobacteria R7C24 NA NA NA
Seq346 Bacteria Proteobacteria Gammaproteobacteria Salinisphaerales Solimonadaceae Alkanibacter NA
Seq474 Bacteria Proteobacteria Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA
Seq472 Bacteria Proteobacteria Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA
Seq227 Bacteria Proteobacteria Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA

$red \(red\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 75 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 75 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 75 tips and 74 internal nodes ] refseq() DNAStringSet: [ 75 reference sequences ]

\(red\)amp ampvis2 object with 5 elements. Summary of OTU table: Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads 35 75 66650 241 5339 1528 Avg#Reads 1904.29

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 75(100%) 75(100%) 75(100%) 74(98.67%) 74(98.67%) 66(88%) 11(14.67%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(red\)heat \(red\)heat_rel \(red\)tree \(red\)taxa

Kingdom Phylum Class Order Family Genus Species
Seq117 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Burkholderiaceae Burkholderia NA
Seq60 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Alcaligenaceae NA NA
Seq121 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Comamonadaceae Variovorax NA
Seq383 Bacteria Proteobacteria Alphaproteobacteria Ferrovibrionales Ferrovibrionaceae Ferrovibrio soli
Seq228 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae NA NA
Seq178 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Beijerinckiaceae Methylobacterium-Methylorubrum NA
Seq171 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Beijerinckiaceae Bosea thiooxidans
Seq341 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Beijerinckiaceae Bosea NA
Seq89 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Tardiphaga robiniae
Seq31 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Xanthobacteraceae Starkeya NA
Seq160 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Hyphomicrobiaceae Hyphomicrobium NA
Seq251 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingopyxis NA
Seq373 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Altererythrobacter NA
Seq108 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Mesorhizobium NA
Seq71 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales Solirubrobacteraceae Conexibacter NA
Seq338 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Pajaroellobacter NA
Seq230 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Pajaroellobacter NA
Seq88 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Sorangium NA
Seq303 Bacteria Actinobacteriota Acidimicrobiia Microtrichales Ilumatobacteraceae NA NA
Seq93 Bacteria Actinobacteriota Actinobacteria Micrococcales Promicromonosporaceae Promicromonospora NA
Seq366 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Leifsonia NA
Seq134 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae NA aoyamense
Seq411 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Agromyces NA
Seq115 Bacteria Actinobacteriota Actinobacteria Corynebacteriales Mycobacteriaceae Mycobacterium NA
Seq128 Bacteria Actinobacteriota Actinobacteria Streptosporangiales Streptosporangiaceae Herbidospora mongoliensis
Seq398 Bacteria Actinobacteriota Actinobacteria Streptosporangiales Streptosporangiaceae Nonomuraea NA
Seq78 Bacteria Actinobacteriota Actinobacteria Streptosporangiales Thermomonosporaceae Actinocorallia NA
Seq193 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae Streptomyces NA
Seq148 Bacteria Bacteroidota Bacteroidia Sphingobacteriales Sphingobacteriaceae Pedobacter NA
Seq307 Bacteria Bacteroidota Bacteroidia Sphingobacteriales NS11-12 marine group NA NA
Seq431 Bacteria Bacteroidota Bacteroidia Sphingobacteriales NS11-12 marine group NA NA
Seq205 Bacteria Firmicutes Bacilli Paenibacillales Paenibacillaceae Paenibacillus lautus
Seq86 Bacteria Firmicutes Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq51 Bacteria Firmicutes Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq18 Bacteria Firmicutes Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq192 Bacteria Firmicutes Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq266 Bacteria Firmicutes Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq278 Bacteria Firmicutes Bacilli Bacillales Planococcaceae Domibacillus NA
Seq99 Bacteria Firmicutes Bacilli Bacillales Bacillaceae Bacillus NA
Seq53 Bacteria Firmicutes Bacilli Bacillales Planococcaceae Lysinibacillus NA
Seq118 Bacteria Firmicutes Bacilli Bacillales Planococcaceae Lysinibacillus NA
Seq76 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq232 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq168 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq387 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Chthoniobacteraceae Chthoniobacter NA
Seq285 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Chthoniobacteraceae Chthoniobacter NA
Seq217 Bacteria Verrucomicrobiota Verrucomicrobiae Verrucomicrobiales Verrucomicrobiaceae Verrucomicrobium spinosum
Seq190 Bacteria Planctomycetota Planctomycetes Gemmatales Gemmataceae Gemmata NA
Seq249 Bacteria Planctomycetota Planctomycetes Isosphaerales Isosphaeraceae Singulisphaera NA
Seq254 Bacteria Acidobacteriota Acidobacteriae Acidobacteriales Acidobacteriaceae (Subgroup 1) Edaphobacter NA
Seq67 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq176 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq268 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq32 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq84 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq459 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq40 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella hibisci
Seq173 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq246 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq112 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Flavitalea NA
Seq126 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga ginsengihumi
Seq102 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga arvensicola
Seq309 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga soli
Seq70 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq262 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq301 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Taibaiella NA
Seq572 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Moraxellaceae NA NA
Seq436 Bacteria Proteobacteria Gammaproteobacteria Steroidobacterales Steroidobacteraceae Steroidobacter NA
Seq261 Bacteria Proteobacteria Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq298 Bacteria Proteobacteria Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq354 Bacteria Proteobacteria Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq226 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Rhodanobacteraceae Tahibacter NA
Seq147 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Xanthomonadaceae Xanthomonas NA
Seq370 Bacteria Proteobacteria Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA
Seq486 Bacteria Proteobacteria Gammaproteobacteria NA NA NA NA

$salmon \(salmon\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 73 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 73 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 73 tips and 72 internal nodes ] refseq() DNAStringSet: [ 73 reference sequences ]

\(salmon\)amp ampvis2 object with 5 elements. Summary of OTU table: Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads 35 73 115332 0 20534 779 Avg#Reads 3295.2

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 73(100%) 73(100%) 73(100%) 72(98.63%) 71(97.26%) 66(90.41%) 12(16.44%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(salmon\)heat \(salmon\)heat_rel \(salmon\)tree \(salmon\)taxa

Kingdom Phylum Class Order Family Genus Species
Seq79 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Oxalobacteraceae Massilia armeniaca
Seq122 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Oxalobacteraceae NA NA
Seq179 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Oxalobacteraceae Pseudoduganella NA
Seq145 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Oxalobacteraceae Pseudoduganella eburnea
Seq154 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Oxalobacteraceae Massilia NA
Seq222 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Oxalobacteraceae Massilia NA
Seq159 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Burkholderiaceae Cupriavidus NA
Seq8 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Burkholderiaceae Cupriavidus NA
Seq14 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Alcaligenaceae Achromobacter NA
Seq54 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Comamonadaceae NA paradoxus
Seq95 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Comamonadaceae Xylophilus NA
Seq340 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Comamonadaceae Rhizobacter NA
Seq264 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingomonas mucosissima
Seq333 Bacteria Proteobacteria Alphaproteobacteria Reyranellales Reyranellaceae Reyranella NA
Seq158 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Beijerinckiaceae Microvirga NA
Seq248 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium NA
Seq245 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Devosiaceae Devosia neptuniae
Seq185 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Shinella NA
Seq156 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae NA NA
Seq42 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae NA NA
Seq286 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Ensifer NA
Seq21 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium NA
Seq107 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Neorhizobium NA
Seq231 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium azooxidifex
Seq140 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales Rhizobiaceae NA NA
Seq378 Bacteria Proteobacteria Alphaproteobacteria Caulobacterales Caulobacteraceae Phenylobacterium mobile
Seq374 Bacteria Myxococcota Polyangia Haliangiales Haliangiaceae Haliangium NA
Seq257 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Pajaroellobacter NA
Seq223 Bacteria Actinobacteriota Actinobacteria Micrococcales Promicromonosporaceae Cellulosimicrobium NA
Seq83 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Microbacterium NA
Seq68 Bacteria Actinobacteriota Actinobacteria Corynebacteriales Mycobacteriaceae Mycobacterium NA
Seq293 Bacteria Actinobacteriota Actinobacteria Glycomycetales Glycomycetaceae Glycomyces NA
Seq72 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae Streptomyces NA
Seq101 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae Streptomyces NA
Seq265 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae Streptomyces NA
Seq260 Bacteria Bacteroidota Bacteroidia Sphingobacteriales NA NA NA
Seq180 Bacteria Bacteroidota Bacteroidia Sphingobacteriales Sphingobacteriaceae Pedobacter panaciterrae
Seq142 Bacteria Verrucomicrobiota Verrucomicrobiae Verrucomicrobiales Verrucomicrobiaceae Roseimicrobium NA
Seq163 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq253 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq116 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq184 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq52 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Flavobacterium NA
Seq187 Bacteria Bacteroidota Bacteroidia Flavobacteriales Weeksellaceae Chryseobacterium ginsenosidimutans
Seq172 Bacteria Bacteroidota Bacteroidia Flavobacteriales Weeksellaceae Chryseobacterium NA
Seq283 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Flavitalea NA
Seq113 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq110 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq132 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq37 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq162 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq182 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga humicola
Seq92 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq80 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga pinensis
Seq2 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq221 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq20 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq282 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq38 Bacteria Bacteroidota Bacteroidia Cytophagales Spirosomaceae Dyadobacter NA
Seq237 Bacteria Proteobacteria Gammaproteobacteria Steroidobacterales Steroidobacteraceae Steroidobacter agariperforans
Seq234 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Xanthomonadaceae Pseudoxanthomonas NA
Seq59 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Xanthomonadaceae Stenotrophomonas NA
Seq105 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Xanthomonadaceae Stenotrophomonas NA
Seq344 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Xanthomonadaceae Lysobacter NA
Seq41 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Xanthomonadaceae Lysobacter NA
Seq243 Bacteria Proteobacteria Gammaproteobacteria NA NA NA NA
Seq406 Bacteria Proteobacteria Gammaproteobacteria Legionellales Legionellaceae Legionella NA
Seq139 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq24 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq27 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq87 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq55 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq201 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae Klebsiella NA

0.6.3 mdp and other indices for clusters

Я не могу сделать стат. поддержку mdp для кластеров, но в ранних кластераx(salmon, cyan) mdp ниже, чем в поздних.

external_empty_dataframe <- data.frame(cluster=factor(),
                                       mdp=numeric(),
                                       richness=numeric(),
                                       stringsAsFactors = FALSE)

for (i in names(l_vst)) {
  ps.in <- l_vst[[i]][["ps"]]
  m <- ps.in@otu_table@.Data %>% 
    colSums() %>% 
    as.data.frame() %>% 
    as.matrix() %>% 
    t()
  
  mdp_index <- picante::mpd(m, cophenetic(ps.in@phy_tree)) 
  ricness_index <- ps.in@tax_table %>% 
    rownames() %>% 
    length()
  d <- data.frame(cluster = i,
                  mdp = mdp_index,
                  richness = ricness_index)
  external_empty_dataframe <- rbind(external_empty_dataframe, d)
}

external_empty_dataframe %>% 
  dplyr::arrange(mdp)
##     cluster      mdp richness
## 1      cyan 1.567685       30
## 2    salmon 1.684152       73
## 3       red 1.748594       75
## 4      plum 1.779306       30
## 5      blue 1.798298       86
## 6 darkgreen 2.203539       44
colSums(l_vst$salmon$ps@otu_table@.Data)
##  Seq79 Seq122 Seq179 Seq145 Seq154 Seq222 Seq159   Seq8  Seq14  Seq54  Seq95 
##   1570    975    549    765    703    390    669   9841   6482   2589   1262 
## Seq340 Seq264 Seq333 Seq158 Seq248 Seq245 Seq185 Seq156  Seq42 Seq286  Seq21 
##    195    304    200    669    337    341    516    688   3429    261   5170 
## Seq107 Seq231 Seq140 Seq378 Seq374 Seq257 Seq223  Seq83  Seq68 Seq293  Seq72 
##   1064    363    798    158    160    310    390   1469   1941    249   1822 
## Seq101 Seq265 Seq260 Seq180 Seq142 Seq163 Seq253 Seq116 Seq184  Seq52 Seq187 
##   1179    301    309    542    791    638    317   1038    523   2663    512 
## Seq172 Seq283 Seq113 Seq110 Seq132  Seq37 Seq162 Seq182  Seq92  Seq80   Seq2 
##    572    266   1047   1054    875   3566    653    532   1311   1524  16855 
## Seq221  Seq20 Seq282  Seq38 Seq237 Seq234  Seq59 Seq105 Seq344  Seq41 Seq243 
##    392   5184    266   3539    350    357   2372   1105    189   3465    345 
## Seq406 Seq139  Seq24  Seq27  Seq87  Seq55 Seq201 
##    137    799   4452   4271   1373   2582    457

0.7 Add a metadata

list.files("meta/")
## [1] "cell_realtime_stat.xlsx" "cell_resp_ch_stat.xlsx" 
## [3] "period_legend.xlsx"

period_legend - соответствие номеров мешочков в 16S с днями, думаю лучше заменить везде обозначения 1, 3 и тд на дни cell_resp - данные по дыханию, по ним какую-нибудь простую статистику. Да, повторностей для эксперимента и контроля разное количество cell_realtime - это циклы выхода целлюлаз по реалтайму, я их нормировала по 16S, есть в этой же таблице. Что с ними делать особо не знаю, вроде вообще решили выкинуть

realtime.data <- readxl::read_excel("meta/cell_realtime_stat.xlsx")
period.data <- readxl::read_excel("meta/period_legend.xlsx")
resp.data <- readxl::read_excel("meta/cell_resp_ch_stat.xlsx")
realtime.data
## # A tibble: 36 × 29
##    id        day contr…¹ CELL_…² CELL_…³ CELL_…⁴ CELL_…⁵ CELL_…⁶ CELL_…⁷ CELL_…⁸
##    <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 G01-1-1     3    15.6    33.2    31.1    33.6    36.6    33.8    38.0    33.0
##  2 G01-1-2     3    15.5    33.3    31.3    32.9    36.2    33.8    41      32.7
##  3 G01-1-3     3    15.5    33.6    31.2    32.9    35.8    33.3    38.2    32.9
##  4 G01-2-1     3    17.1    36.0    31.1    33.1    36.9    36.4    38.8    33.8
##  5 G01-2-2     3    17.3    35.6    31.0    32.8    37.8    35.7    40.1    33.6
##  6 G01-2-3     3    17.1    36      31.1    32.8    38.1    35.2    37.6    33.6
##  7 G01-3-1     3    16.4    35.4    31.6    34.9    39.1    35.3    40.9    33.2
##  8 G01-3-2     3    16.5    35.6    31.3    34.4    37.9    34.6    38.9    33.1
##  9 G01-3-3     3    16.4    35.8    31.5    34.4    41.6    35.0    36.5    33.4
## 10 G05-1-1    28    18.2    26.3    29.1    29.4    28.2    28.4    37.4    34.5
## # … with 26 more rows, 19 more variables: CELL_193122 <dbl>, CELL_73229 <dbl>,
## #   CELL_47814 <dbl>, CELL_163125 <dbl>, CELL_73266 <dbl>, CELL_88582 <dbl>,
## #   CELL_63583 <dbl>, CELL_14199 <dbl>, CELL_95616 <dbl>, CELL_63504 <dbl>,
## #   CELL_08643 <chr>, CELL_199599 <dbl>, CELL_01426 <dbl>, CELL_71601 <dbl>,
## #   CELL_45099 <dbl>, CELL_191900 <dbl>, CELL_99463 <dbl>, CELL_74579 <dbl>,
## #   CELL_183403 <dbl>, and abbreviated variable names ¹​contr_16S, ²​CELL_172283,
## #   ³​CELL_203163, ⁴​CELL_83325, ⁵​CELL_188413, ⁶​CELL_109631, ⁷​CELL_188119, …

0.7.1 Realtime data

impute.mean <- function(x) replace(x, is.na(x), mean(x, na.rm = TRUE))

realtime_data <- realtime.data %>% 
  mutate(CELL_08643 = as.numeric(CELL_08643)) %>% 
  group_by(day) %>%
  mutate(nice_cell = impute.mean(CELL_08643)) %>% 
  mutate(day = as.factor(day))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
realtime_zjena <- readxl::read_excel("cellulases_gene_expression(1).xlsx")
geom_mean <-function(x){exp(mean(log(x)))}

realtime_zjena_geom <- realtime_zjena %>% 
  mutate(repeats = paste0(realtime_zjena$week, "-", rep(1:3, 3, each=3))) %>% 
  relocate(repeats, 1) %>% 
  group_by(repeats) %>% 
  summarise_if(is.numeric, geom_mean) %>% 
  mutate(week = as.factor(week)) %>% 
  arrange(week)

UPGMA целлюлаз(дистанция - брей).
Не очень понимаю, что это значит. В зависимости от дистанции получаются совсем разные кластеризации.

realtime_matrix <- realtime_zjena_geom %>% 
  column_to_rownames("repeats") %>% 
  select_if(is.numeric) %>% 
  as.matrix()

hcl <- hclust(vegan::vegdist(t(realtime_matrix), method="bray"), "average")
plot(hcl)

А вот - евклидова дистанция.
Ничего общего с предыдущей картинкой

hcl <- hclust(vegan::vegdist(t(realtime_matrix), method="euclidian"), "average")
plot(hcl)

Корплот целлюлаз.
Только корреляции

m = cor(realtime_matrix)
corrplot::corrplot(m)

Только достоверные корреляции
В общем - кластеры есть - но корреляцие недостоверные(за некоторыми исключениями)

cor_test_mat <- psych::corr.test(realtime_matrix)$p
corrplot::corrplot(m, p.mat = cor_test_mat, method = 'circle', type = 'lower', insig='blank',
         order = 'AOE', diag = FALSE)$corrPos -> p1

# text(p1$x, p1$y, round(p1$corr, 2))

То же, но матрица уже прологарифмирована.

cor_test_mat <- psych::corr.test(log(realtime_matrix))
corrplot::corrplot(cor_test_mat$r, p.mat = cor_test_mat$p, method = 'circle', type = 'lower', insig='blank',
         order = 'AOE', diag = FALSE)$corrPos -> p1

# text(p1$x, p1$y, round(p1$corr, 2))

add phylogenic tree (mafft - iqtree(ML))

realtime_tree <- ape::read.tree("al_chz.fasta.contree")
plot(realtime_tree)

library(tidyverse)

realtime_data %>% 
  select(c("day", "id", "contr_16S", "CELL_172283")) %>% 
  mutate(bio_repl = gsub("-[1-3]$", "", id)) %>% 
  group_by(bio_repl, day) %>% 
  summarise(contr_tmean = mean(contr_16S),
            data_tmean = mean(CELL_172283)) %>% 
  mutate(dCt = data_tmean - contr_tmean)
## `summarise()` has grouped output by 'bio_repl'. You can override using the
## `.groups` argument.
## # A tibble: 12 × 5
## # Groups:   bio_repl [12]
##    bio_repl day   contr_tmean data_tmean   dCt
##    <chr>    <fct>       <dbl>      <dbl> <dbl>
##  1 G01-1    3            15.5       33.4 17.8 
##  2 G01-2    3            17.2       35.9 18.7 
##  3 G01-3    3            16.4       35.6 19.2 
##  4 G05-1    28           18.4       26.2  7.81
##  5 G05-2    28           16.5       26.2  9.73
##  6 G05-3    28           18.5       32.0 13.6 
##  7 G10-1    91           17.4       26.2  8.72
##  8 G10-2    91           16.3       24.1  7.79
##  9 G10-3    91           17.5       26.3  8.81
## 10 G14-1    161          18.0       28.1 10.1 
## 11 G14-2    161          16.5       27.0 10.6 
## 12 G14-3    161          16.9       25.4  8.41
resp_data <- resp.data %>% 
  group_by(day) %>%
  mutate(control = impute.mean(control)) %>% 
  mutate(straw = impute.mean(straw)) 

resp_data
## # A tibble: 164 × 3
## # Groups:   day [27]
##      day control straw
##    <dbl>   <dbl> <dbl>
##  1     0    250   250 
##  2     3    285.  723.
##  3     3    263.  832.
##  4     3    131.  657.
##  5     3    307.  525.
##  6     3    241.  744.
##  7     3    245.  701.
##  8     3    245.  657.
##  9     7    274.  690.
## 10     7    296.  690.
## # … with 154 more rows

0.7.2 Knee plot

Дыхание - median(straw)/median(control)

Mожно использовать сторонний пакет(KneeArrower) что понять в каком месте knee_plot происходит перелом. Для элиминации повторностей возьмем медиану.   Я хз как этот пакет работает(ищет производную, но как сглаживает - хз, там матан), но он говорит что перелом происходит скорее на 60-80 днях.

(я исправил ошибки - стало ближе к твоим данным)

https://github.com/agentlans/KneeArrower - вот здесь можно почитать про матан

# resp_data %>% 
#   filter(!day == 0) %>% 
#   group_by(day) %>% 
#   summarise(median_control = median(control),
#             median_straw = median(straw)) %>% 
#   mutate(rel = median_straw/median_control) %>% 
#   ggplot() +
#   geom_point(aes(x = day, y = rel))

resp_median <- resp_data %>% 
  filter(!day == 0) %>% 
  group_by(day) %>% 
  summarise(median_control = median(control),
            median_straw = median(straw)) %>% 
  mutate(rel = median_straw/median_control)


thresholds <- c(0.25, 0.5, 0.75, 1)

# Find cutoff points at each threshold
cutoff.points <- lapply(thresholds, function(i) {
  findCutoff(resp_median$day, resp_median$rel, method="first", i)
})

x.coord <- sapply(cutoff.points, function(p) p$x)
y.coord <- sapply(cutoff.points, function(p) p$y)

# Plot the cutoff points on the scatterplot
plot(resp_median$day, resp_median$rel, pch=20, col="gray")
points(x.coord, y.coord, col="red", pch=20)
text(x.coord, y.coord, labels=thresholds, pos=4, col="red")

period_data <- period.data %>%
  mutate(bag_id = as.factor(bag_id))

period_data
## # A tibble: 15 × 2
##    bag_id   day
##    <fct>  <dbl>
##  1 1          3
##  2 2          7
##  3 3         14
##  4 4         21
##  5 5         28
##  6 6         35
##  7 7         49
##  8 8         63
##  9 9         77
## 10 10        91
## 11 11       105
## 12 12       119
## 13 13       140
## 14 14       161
## 15 15       182

0.7.3 альфа/дыхание

Ну я вот не очень понимаю что делать дальше - привязать кластеры к этой картинке?

resp_median_bags <- resp_median %>% 
  left_join(period.data, by="day") %>% 
  mutate(bag_id = as.factor(bag_id))

ps.f.r <- rarefy_even_depth(ps.f)

estimate_richness(ps.f.r) %>% 
  rownames_to_column("ID") %>% 
  mutate(bag_id = as.factor(
    as.numeric(
      gsub("\\..+","",
           gsub("straw\\.16s\\.D","", ID)
           )
      )
    )
    ) %>% 
  group_by(bag_id) %>% 
  summarise(Observed = mean(Observed),
            Shannon = mean(Shannon),
            InvSimpson = mean(InvSimpson)) %>% 
  left_join(period_data, by="bag_id") %>% 
  left_join(resp_median_bags, by="bag_id") %>% 
  mutate(
    Observed_scaled = scale(Observed),
    Shannon_scaled = scale(Shannon),
    InvSimpson_scaled = scale(InvSimpson),
    Respiration_scaled = scale(rel)
  ) %>% 
  select(c(bag_id, Observed_scaled, Shannon_scaled, InvSimpson_scaled, Respiration_scaled)) %>% 
  pivot_longer(c("Observed_scaled", "Shannon_scaled", "InvSimpson_scaled", "Respiration_scaled")) %>% 
  ggplot(aes(y = value, x = bag_id, group = name)) +
  geom_line(aes(color = name),
            alpha = 0.8) +
  theme_bw()

Корреляция -ртрицательная слабенькая, не особо достоверная и только Пирсона(которая работает для нормального распределения(у нас хроносерия, надо спирмана по хорошему))

alpha_resp <- phyloseq::estimate_richness(ps.f.r) %>% 
  rownames_to_column("ID") %>% 
  mutate(bag_id = as.factor(
    as.numeric(
      gsub("\\..+","",
           gsub("straw\\.16s\\.D","", ID)
           )
      )
    )
    ) %>% 
  group_by(bag_id) %>% 
  summarise(Observed = mean(Observed),
            Shannon = mean(Shannon),
            InvSimpson = mean(InvSimpson)) %>% 
  left_join(period_data, by="bag_id") %>% 
  left_join(resp_median_bags, by="bag_id") %>% 
  mutate(
    Observed_scaled = scale(Observed),
    Shannon_scaled = scale(Shannon),
    InvSimpson_scaled = scale(InvSimpson),
    Respiration_scaled = scale(rel)
  ) %>% 
  select(c(bag_id, Observed_scaled, Shannon_scaled, InvSimpson_scaled, Respiration_scaled))

cor.test(alpha_resp$Observed_scaled, alpha_resp$Respiration_scaled, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  alpha_resp$Observed_scaled and alpha_resp$Respiration_scaled
## S = 230, p-value = 0.2629
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3939394
cor.test(alpha_resp$Shannon_scaled, alpha_resp$Respiration_scaled, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  alpha_resp$Shannon_scaled and alpha_resp$Respiration_scaled
## S = 224, p-value = 0.3128
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3575758
cor.test(alpha_resp$InvSimpson_scaled, alpha_resp$Respiration_scaled, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  alpha_resp$InvSimpson_scaled and alpha_resp$Respiration_scaled
## S = 226, p-value = 0.2956
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## -0.369697
cor.test(alpha_resp$Observed_scaled, alpha_resp$Respiration_scaled, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  alpha_resp$Observed_scaled and alpha_resp$Respiration_scaled
## t = -2.7866, df = 8, p-value = 0.02368
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9234071 -0.1293521
## sample estimates:
##        cor 
## -0.7018198
cor.test(alpha_resp$Shannon_scaled, alpha_resp$Respiration_scaled, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  alpha_resp$Shannon_scaled and alpha_resp$Respiration_scaled
## t = -2.8622, df = 8, p-value = 0.02108
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9261458 -0.1479056
## sample estimates:
##        cor 
## -0.7112926
cor.test(alpha_resp$InvSimpson_scaled, alpha_resp$Respiration_scaled, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  alpha_resp$InvSimpson_scaled and alpha_resp$Respiration_scaled
## t = -2.2589, df = 8, p-value = 0.05381
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.900037185  0.009178016
## sample estimates:
##        cor 
## -0.6240545

0.8 Export all phyloseq object in one table

В табличке есть колонка cluster - “exl” в ней обозначает что это та часть датасета которая не пошла в WGCNA - мажоры в единичных семплах.

ps.m <- phyloseq::psmelt(ps.f)

ps.m <- ps.m %>% 
  mutate_if(is.character, as.factor) 

ps.data.out <- ps.m %>%
  select(-Group) %>% 
  pivot_wider(names_from = c(Day, Description, Sample), values_from = Abundance, values_fill = 0) 

#create empty dataframe with columnnames
external_empty_dataframe <- data.frame(OTU=factor(), cluster=factor(), stringsAsFactors = FALSE)

for (i in names(l_vst)) {
  a <- taxa_names(l_vst[[i]][["ps"]])
  b <- rep(i, length(a))
  d <- data.frame(OTU = as.factor(a),
                  cluster = as.factor(b))
  external_empty_dataframe <- rbind(external_empty_dataframe, d)
}

clusters.otu.df <- external_empty_dataframe
# add exl taxa -- taxa "exl"
d <- data.frame(OTU = as.factor(ps.exl.taxa),
                cluster = as.factor(rep("exl", length(ps.exl.taxa))))

clusters.otu.df <- rbind(clusters.otu.df, d)
ps.data.out.exl <- left_join(clusters.otu.df, ps.data.out, by="OTU")
# write.table(ps.data.out.exl, file = "ps.data.out.tsv", sep = "\t")
ps.data.out.exl

0.8.1 Несколько картинок по динамике предсталенности отдельных фил

Bdellovibrionota - хищники, индикатор развитого сообщества

ps.m %>% 
  filter(Phylum == "Bdellovibrionota") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()
## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

Myxococcota - вроде бы тоже, но как нам известно могут быть целлулотитиками

ps.m %>% 
  filter(Phylum == "Myxococcota") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()
## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

Археи появляются тоже на поздних стадиях - вообще я бы хотел бы опять развить тему важности азотного метаболизма на поздних стадиях разложения.
Оч хочется метагеном, но не этот.
Кроме того хочется отметить, что эти минорные группы возникают на D12

ps.m %>% 
  filter(Phylum == "Crenarchaeota") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()
## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

Gammaproteobacteria - они треть кластера blue

ps.m %>% 
  filter(Class == "Gammaproteobacteria") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()
## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

0.8.2 Боксплоты по филам

Все филы - представлененось логорифмирована по основанию 2
Отдельные точки - суммы абсолютных значений ридов по дням
Логорифмированы уже суммы, а не отдельные филотипы

#select only major phylums
top_phylum <- ps.m %>% 
  count(Phylum) %>% 
  arrange(desc(n)) %>% 
  top_n(10) %>% 
  pull(Phylum)
## Selecting by n
ps.m %>% 
  filter(Phylum %in% top_phylum) %>% 
  mutate(
    Phylum = as.character(Phylum),
    Class = as.character(Class),
    phylum = ifelse(Phylum == "Proteobacteria", Class, Phylum)
  ) %>% 
  group_by(Description, Day, phylum) %>% 
  filter(!is.na(phylum)) %>% 
  summarise(Bs = log2(sum(Abundance))) %>% 
  ggplot(aes(x = Day, y = Bs)) +
  geom_boxplot(fill="#4DBBD5B2", alpha=0.4) +
  theme_bw() +
  facet_wrap(~ phylum)
## `summarise()` has grouped output by 'Description', 'Day'. You can override
## using the `.groups` argument.
## Warning: Removed 46 rows containing non-finite values (stat_boxplot).

0.8.3 resperation and phylotypes correlations

little_res <- resp_median_bags %>% 
  filter(day %in% ps.varstab@sam_data$Day)

ps.varstab.merged <- phyloseq::merge_samples(ps.varstab, "Day")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
otus_cor <- ps.varstab.merged@otu_table %>% 
  as.data.frame()

s1 <- sapply(colnames(otus_cor), function(x) {
  cor <- cor.test(otus_cor[[x]], little_res$rel, method="spearman", exact = FALSE)
  c1 <- cor$p.value
  c2 <- cor$estimate %>% unname()
  return(list("p.value"=c1, "rho"=c2))})

s1 <- s1 %>% 
  t() %>% 
  as.data.frame() %>% 
  unnest(cols = c(p.value, rho), .id = "id") %>% 
  column_to_rownames("id") %>% 
  mutate(p.adj = p.adjust(p.value, method = "BH", n = length(p.value)))
## Warning: The `.id` argument of `unnest()` is deprecated as of tidyr 1.0.0.
## Manually create column of names instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
taxa <- as.data.frame(ps.varstab@tax_table@.Data) %>% 
  rownames_to_column("ID")

s1 %>%
  rownames_to_column("ID") %>% 
  left_join(taxa) %>% 
  arrange(rho) %>%
  filter(Phylum %in% c("Proteobacteria", "Acidobacteriota", "Actinobacteriota","Chloroflexi", "Planctomycetota", "Bacteroidota", "Verrucomicrobiota", "Firmicutes")) %>% 
  mutate(Rank = ifelse(Phylum == "Proteobacteria", Class, Phylum)) %>%
  ggplot(aes(x=rho, color=Rank, fill=Rank)) +
  geom_histogram(alpha=0.3, bins = 30) +
  theme_minimal() +
  facet_wrap(~ Rank) +
  theme(legend.position="none")
## Joining, by = "ID"

0.9 ITS

library(DESeq2)

ps_its <- readRDS("../../its/d2/ps_its")

sample.data <- ps_its@sam_data %>% 
  data.frame() %>% 
  mutate(Group = if_else(Day %in% c("D01", "D03", "D05"), "early", 
                         if_else(Day %in% c("D07", "D08","D10"), "middle", "late"))) %>% 
  rename('Day' = 'Bag') %>% 
  mutate(Group = factor(Group, levels=c("early", "middle","late"))) %>% 
  mutate(Day = Bag %>% 
           forcats::fct_recode( "3" = "D01",
                                "14" = "D03",
                                "28" = "D05",
                                "49" = "D07" ,
                                "63" = "D08",
                                "91" = "D10",
                                "119" = "D12",
                                "140" = "D13",
                                "161" = "D14",
                                "182" = "D15")
         ) %>% 
  phyloseq::sample_data()

sample_data(ps_its) <- sample.data

amp_its <- phyloseq_to_ampvis2(ps_its)

ps.its.inall <- phyloseq::filter_taxa(ps_its, function(x) sum(x > 10) > (0.1*length(x)), TRUE)
ps.its.inall <- prune_taxa(taxa_sums(ps.its.inall) > 0, ps.its.inall)
Biostrings::writeXStringSet(ps.its.inall@refseq, file="ref_inall.fasta")
Biostrings::writeXStringSet(ps_its@refseq, file="ref.fasta")

tree <- ape::read.tree("../../its/d2/al.fasta.contree")

ps.its.inall@phy_tree <- tree
ps.its.inall  <- prune_taxa(taxa_sums(ps.its.inall) > 0, ps.its.inall)


ps.its.exl  <- phyloseq::filter_taxa(ps_its, function(x) sum(x > 10) < (0.1*length(x)), TRUE)
ps.its.exl  <- prune_taxa(taxa_sums(ps.its.exl) > 100, ps.its.exl)
ps.its.exl.taxa <- taxa_names(ps.its.exl)

amp_its_inall <- phyloseq_to_ampvis2(ps.its.inall)

Общая статистика по its

amp_its
## ampvis2 object with 4 elements. 
## Summary of OTU table:
##      Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
##           36         1295       815134        13533        34994        22657 
##    Avg#Reads 
##     22642.61 
## 
## Assigned taxonomy:
##     Kingdom      Phylum       Class       Order      Family       Genus 
##    554(43%) 453(34.98%) 362(27.95%) 335(25.87%) 321(24.79%) 306(23.63%) 
##     Species 
## 207(15.98%) 
## 
## Metadata variables: 5 
##  SampleID, Bag, Description, Group, Day

Мажорные its филотипы

amp_heatmap(amp_its,
            tax_show = 10,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

Филы по всем

amp_heatmap(amp_its,
            tax_show = 7,
            group_by = "Day",
            tax_aggregate = "Phylum",
            tax_add = "Kingdom",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

Те, которые пойдут в WGCNA(116 филотипов)

amp_heatmap(amp_its_inall,
            tax_show = 40 ,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

beta_custom_norm_NMDS_elli_w(ps_its, Group="Day", Color="Day")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1834317 
## Run 1 stress 0.1834317 
## ... Procrustes: rmse 0.0000201183  max resid 0.00005501659 
## ... Similar to previous best
## Run 2 stress 0.1841584 
## Run 3 stress 0.1834317 
## ... New best solution
## ... Procrustes: rmse 0.000009091179  max resid 0.00003505055 
## ... Similar to previous best
## Run 4 stress 0.1841584 
## Run 5 stress 0.1834317 
## ... New best solution
## ... Procrustes: rmse 0.00002509501  max resid 0.00007022262 
## ... Similar to previous best
## Run 6 stress 0.1834317 
## ... Procrustes: rmse 0.000003875879  max resid 0.00001214466 
## ... Similar to previous best
## Run 7 stress 0.1834317 
## ... Procrustes: rmse 0.000006013033  max resid 0.00002521067 
## ... Similar to previous best
## Run 8 stress 0.1841585 
## Run 9 stress 0.1834317 
## ... Procrustes: rmse 0.000005395813  max resid 0.00001721888 
## ... Similar to previous best
## Run 10 stress 0.1841584 
## Run 11 stress 0.2180006 
## Run 12 stress 0.1834317 
## ... Procrustes: rmse 0.0000189146  max resid 0.00008526694 
## ... Similar to previous best
## Run 13 stress 0.2171479 
## Run 14 stress 0.1841584 
## Run 15 stress 0.2097497 
## Run 16 stress 0.1834317 
## ... Procrustes: rmse 0.0000264768  max resid 0.00007101285 
## ... Similar to previous best
## Run 17 stress 0.1893642 
## Run 18 stress 0.1834317 
## ... Procrustes: rmse 0.000009293648  max resid 0.00002426741 
## ... Similar to previous best
## Run 19 stress 0.1834317 
## ... Procrustes: rmse 0.000008230916  max resid 0.00002983058 
## ... Similar to previous best
## Run 20 stress 0.1834317 
## ... Procrustes: rmse 0.00002722606  max resid 0.00008598357 
## ... Similar to previous best
## *** Solution reached

Первый плот - clr нормализация (композиционная), второй - vst(DESeq2). В отличие от бактерий - для vst WGCNA не показал хороших результатов (нет выхода на плато).
Мы всё равно применили vst стабилизацию, но нужно держать в голове, что получившийся результат не очень.

otu.inall <- as.data.frame(ps.its.inall@otu_table@.Data) 
ps.inall.clr <- ps.its.inall
otu_table(ps.inall.clr) <- phyloseq::otu_table(compositions::clr(otu.inall), taxa_are_rows = FALSE)
data <- ps.inall.clr@otu_table@.Data %>% 
  as.data.frame()
rownames(data) <- as.character(ps.inall.clr@sam_data$Description)
powers <-  c(c(1:10), seq(from = 12, to=30, by=1))
sft <-  pickSoftThreshold(data, powerVector = powers, verbose = 5, networkType = "signed")
## pickSoftThreshold: will use block size 116.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 116 of 116
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.3860 -25.70         0.5550 57.2000 57.100000 59.600
## 2      2   0.4580 -17.60         0.3190 29.5000 29.300000 32.400
## 3      3   0.4860 -11.60         0.3420 15.8000 15.600000 18.500
## 4      4   0.6370  -7.48         0.5340  8.7100  8.530000 11.300
## 5      5   0.7900  -5.30         0.7300  4.9900  4.780000  7.370
## 6      6   0.8050  -3.89         0.7500  2.9600  2.770000  5.090
## 7      7   0.8400  -2.94         0.8030  1.8300  1.650000  3.710
## 8      8   0.7890  -2.65         0.7530  1.1700  1.000000  2.820
## 9      9   0.2080  -7.17        -0.0165  0.7790  0.626000  2.230
## 10    10   0.8000  -1.88         0.8250  0.5400  0.401000  1.810
## 11    12   0.8610  -1.51         0.9500  0.2890  0.173000  1.360
## 12    13   0.7990  -1.44         0.8150  0.2230  0.121000  1.220
## 13    14   0.8000  -1.34         0.8260  0.1770  0.083400  1.120
## 14    15   0.7480  -1.32         0.7690  0.1440  0.057500  1.020
## 15    16   0.1540  -2.22        -0.0813  0.1190  0.040100  0.941
## 16    17   0.1530  -2.10        -0.0816  0.1010  0.027800  0.869
## 17    18   0.8570  -1.15         0.8780  0.0866  0.019400  0.805
## 18    19   0.0897  -1.46        -0.0687  0.0754  0.013700  0.746
## 19    20   0.0930  -1.43        -0.0720  0.0663  0.009910  0.693
## 20    21   0.2140  -2.01         0.0509  0.0589  0.007180  0.660
## 21    22   0.2200  -2.04         0.0648  0.0527  0.005230  0.643
## 22    23   0.0951  -1.76        -0.0800  0.0475  0.003800  0.627
## 23    24   0.1930  -1.80        -0.0248  0.0431  0.002730  0.612
## 24    25   0.1960  -1.82        -0.0191  0.0393  0.001970  0.598
## 25    26   0.2010  -2.33        -0.0126  0.0360  0.001440  0.585
## 26    27   0.2220  -2.37         0.0609  0.0331  0.001070  0.572
## 27    28   0.2290  -2.40         0.0789  0.0305  0.000788  0.559
## 28    29   0.2320  -2.67         0.0387  0.0283  0.000584  0.547
## 29    30   0.2430  -2.73         0.0567  0.0263  0.000433  0.536
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

ps.varstab <- ps_vst(ps.its.inall, "Day")

data2 <- ps.varstab@otu_table@.Data %>% 
  as.data.frame()

rownames(data2) <- as.character(ps.varstab@sam_data$Description)
powers <-  c(seq(from = 1, to=10, by=0.5), seq(from = 11, to=20, by=1))
sft2 <-  pickSoftThreshold(data2, powerVector = powers, verbose = 5, networkType = "signed hybrid")
## pickSoftThreshold: will use block size 116.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 116 of 116
##    Power SFT.R.sq  slope truncated.R.sq mean.k.  median.k. max.k.
## 1    1.0    0.352 -1.180        0.82400 11.7000 11.1000000  21.40
## 2    1.5    0.511 -0.965        0.80800  6.4500  5.9700000  13.20
## 3    2.0    0.688 -1.020        0.82800  3.9000  3.3000000   9.04
## 4    2.5    0.787 -0.915        0.80900  2.5200  1.9300000   6.41
## 5    3.0    0.828 -0.797        0.78200  1.7200  1.1700000   4.69
## 6    3.5    0.807 -0.913        0.75900  1.2300  0.7570000   3.81
## 7    4.0    0.789 -1.110        0.73200  0.9160  0.5010000   3.54
## 8    4.5    0.200 -2.790        0.08170  0.7060  0.3370000   3.34
## 9    5.0    0.218 -2.720        0.11300  0.5610  0.2300000   3.18
## 10   5.5    0.235 -3.520        0.08280  0.4570  0.1590000   3.04
## 11   6.0    0.165 -2.140       -0.07370  0.3800  0.1100000   2.92
## 12   6.5    0.204 -2.250       -0.00278  0.3220  0.0758000   2.82
## 13   7.0    0.208 -2.210        0.00364  0.2780  0.0538000   2.73
## 14   7.5    0.192 -2.760       -0.03940  0.2430  0.0391000   2.65
## 15   8.0    0.192 -2.670       -0.03910  0.2150  0.0280000   2.57
## 16   8.5    0.196 -2.600       -0.03270  0.1920  0.0203000   2.50
## 17   9.0    0.228 -2.770        0.01890  0.1740  0.0148000   2.44
## 18   9.5    0.213 -2.990       -0.00815  0.1580  0.0108000   2.37
## 19  10.0    0.214 -3.060       -0.00549  0.1450  0.0078000   2.32
## 20  11.0    0.230 -3.170        0.01010  0.1240  0.0041500   2.21
## 21  12.0    0.262 -3.210        0.05580  0.1080  0.0024100   2.12
## 22  13.0    0.222 -3.130        0.01010  0.0955  0.0014200   2.03
## 23  14.0    0.234 -3.060        0.02160  0.0855  0.0008090   1.95
## 24  15.0    0.259 -3.140        0.04770  0.0774  0.0004490   1.88
## 25  16.0    0.270 -3.070        0.06090  0.0707  0.0002500   1.82
## 26  17.0    0.233 -2.840        0.04610  0.0651  0.0001400   1.76
## 27  18.0    0.244 -2.790        0.05240  0.0602  0.0000789   1.71
## 28  19.0    0.251 -2.860        0.06040  0.0561  0.0000444   1.66
## 29  20.0    0.280 -3.030        0.08580  0.0525  0.0000250   1.62
plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

# WGCNA, as old and not supported
detachAllPackages()
library(WGCNA)

net3 <- WGCNA::blockwiseModules(data2,
                          power=3,
                          TOMType="signed",
                          networkType="signed hybrid",
                          nThreads=0)


mergedColors2 <-  WGCNA::labels2colors(net3$colors, colorSeq = c("salmon", "darkgreen", "cyan", "red", "blue", "plum"))

plotDendroAndColors(
  net3$dendrograms[[1]],
  mergedColors2[net3$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE,
  hang = 0.03,
  addGuide = TRUE,
  guideHang = 0.05)

library(phyloseq)
library(tidyverse)
library(ggpubr)
library(ampvis2)
library(heatmaply)
library(WGCNA)
library(phyloseq)
library(ggtree)
library(tidyverse)
library(KneeArrower)

Зеленые - фон, что в случае с кластером red для бактерий, что здесь, я не думаю что это вообще можно называть кластером.
WGCNA как анализ помогает кластеризовывать на основе корреляций при помощи поиска мягкого порога(power, см плот где красные циферки всё никак не могут выйти на плато),
кластер darkgreen и red просто не объединенные филотипы.

modules_of_interest = mergedColors2 %>% 
  unique()

module_df <- data.frame(
  asv = names(net3$colors),
  colors = mergedColors2
)

# module_df[module_df == "yellow"] <- "salmon"

submod <-  module_df %>%
  subset(colors %in% modules_of_interest)

row.names(module_df) = module_df$asv

subexpr = as.data.frame(t(data2))[submod$asv,]

submod_df <- data.frame(subexpr) %>%
  mutate(
    asv = row.names(.)
  ) %>%
  pivot_longer(-asv) %>%
  mutate(
    module = module_df[asv,]$colors
  )

submod_df <- submod_df %>% 
  mutate(name = gsub("\\_.*","",submod_df$name)) %>% 
  group_by(name, asv) %>% 
  summarise(value = mean(value), asv = asv, module = module) %>% 
  relocate(c(asv, name, value, module)) %>% 
  ungroup() %>% 
  mutate(module = as.factor(module))

p <- submod_df %>% 
  ggplot(., aes(x=name, y=value, group=asv)) +
  geom_line(aes(color = module),
            alpha = 0.2) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90),
    legend.position = "none") +
  facet_grid(rows = vars(module)) +
  labs(x = "treatment",
       y = "normalized expression")

p + scale_color_manual(values = levels(submod_df$module))

l_its <- color_filt_broken(ps_its, submod_df, ps.its.inall)
l_its

$cyan \(cyan\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 36 taxa and 36 samples ] sample_data() Sample Data: [ 36 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 36 taxa by 7 taxonomic ranks ] refseq() DNAStringSet: [ 36 reference sequences ]

\(cyan\)amp ampvis2 object with 4 elements. Summary of OTU table: Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads 36 36 318660 0 23584 9007.5 Avg#Reads 8851.67

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 28(78%) 25(69.44%) 17(47.22%) 12(33.33%) 12(33.33%) 12(33.33%) 9(25%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(cyan\)heat \(cyan\)heat_rel \(cyan\)tree \(cyan\)taxa

Kingdom Phylum Class Order Family Genus Species
Seq1 k__Fungi p__Ascomycota c__Sordariomycetes o__Chaetosphaeriales f__Chaetosphaeriaceae g__Chloridium s__aseptatum
Seq2 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Coniochaeta s__verticillata
Seq12 k__Fungi p__Ascomycota c__Sordariomycetes NA NA NA NA
Seq184 NA NA NA NA NA NA NA
Seq10 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Coniochaeta s__canina
Seq73 k__Alveolata NA NA NA NA NA NA
Seq5 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Coniochaeta s__verticillata
Seq118 k__Metazoa p__Nematoda NA NA NA NA NA
Seq18 k__Fungi p__Ascomycota c__Sordariomycetes NA NA NA NA
Seq93 k__Fungi p__Ascomycota c__Dothideomycetes o__Venturiales f__Sympoventuriaceae g__Scolecobasidium s__constrictum
Seq78 k__Metazoa p__Nematoda NA NA NA NA NA
Seq101 NA NA NA NA NA NA NA
Seq4 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Coniochaeta s__verticillata
Seq62 k__Metazoa p__Nematoda NA NA NA NA NA
Seq48 k__Metazoa p__Nematoda NA NA NA NA NA
Seq36 k__Fungi p__Ascomycota c__Sordariomycetes NA NA NA NA
Seq44 k__Metazoa p__Nematoda NA NA NA NA NA
Seq103 k__Metazoa p__Nematoda NA NA NA NA NA
Seq166 k__Fungi p__Ascomycota c__Eurotiomycetes o__Chaetothyriales f__Herpotrichiellaceae g__Exophiala NA
Seq231 k__Eukaryota_kgd_Incertae_sedis NA NA NA NA NA NA
Seq21 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Clavicipitaceae g__Metarhizium s__marquandii
Seq108 k__Metazoa p__Nematoda NA NA NA NA NA
Seq74 k__Fungi p__Ascomycota c__Sordariomycetes NA NA NA NA
Seq195 NA NA NA NA NA NA NA
Seq28 k__Fungi p__Ascomycota c__Leotiomycetes o__Helotiales f__Helotiaceae g__Scytalidium NA
Seq104 k__Metazoa p__Nematoda c__Chromadorea o__Rhabditida f__Cephalobidae g__Pseudacrobeles NA
Seq29 k__Fungi p__Ascomycota c__Sordariomycetes o__Glomerellales f__Glomerellaceae g__Colletotrichum s__sidae
Seq46 k__Fungi p__Ascomycota c__Sordariomycetes NA NA NA NA
Seq122 NA NA NA NA NA NA NA
Seq235 NA NA NA NA NA NA NA
Seq191 NA NA NA NA NA NA NA
Seq94 k__Alveolata NA NA NA NA NA NA
Seq76 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Sordariales_fam_Incertae_sedis g__Staphylotrichum s__boninense
Seq155 NA NA NA NA NA NA NA
Seq190 NA NA NA NA NA NA NA
Seq32 k__Metazoa p__Nematoda NA NA NA NA NA

$darkgreen \(darkgreen\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 56 taxa and 36 samples ] sample_data() Sample Data: [ 36 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 56 taxa by 7 taxonomic ranks ] refseq() DNAStringSet: [ 56 reference sequences ]

\(darkgreen\)amp ampvis2 object with 4 elements. Summary of OTU table: Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads 36 56 261584 1790 20315 6577 Avg#Reads 7266.22

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 52(93%) 51(91.07%) 48(85.71%) 48(85.71%) 48(85.71%) 48(85.71%) 34(60.71%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(darkgreen\)heat \(darkgreen\)heat_rel \(darkgreen\)tree \(darkgreen\)taxa

Kingdom Phylum Class Order Family Genus Species
Seq92 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__zeylanica
Seq49 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__zeylanica
Seq91 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Coniochaeta s__canina
Seq6 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Chaetomiaceae g__Humicola s__sardiniae
Seq16 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__zeylanica
Seq67 k__Metazoa p__Annelida c__Clitellata o__Enchytraeida f__Enchytraeidae g__Fridericia NA
Seq13 k__Fungi p__Ascomycota c__Eurotiomycetes o__Eurotiales f__Trichocomaceae g__Talaromyces NA
Seq7 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__zeylanica
Seq25 k__Fungi p__Basidiomycota c__Cystobasidiomycetes o__Cystobasidiales f__Cystobasidiaceae g__Occultifur NA
Seq69 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Gibberella s__intricans
Seq86 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__zeylanica
Seq205 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Rhizopodaceae g__Rhizopus s__arrhizus
Seq50 k__Fungi p__Basidiomycota c__Cystobasidiomycetes o__Cystobasidiales f__Cystobasidiaceae g__Occultifur NA
Seq193 k__Heterolobosa p__Heterolobosa_phy_Incertae_sedis c__Heterolobosea o__Schizopyrenida f__Vahlkampfiidae g__Naegleria NA
Seq136 k__Fungi p__Ascomycota c__Eurotiomycetes o__Chaetothyriales f__Herpotrichiellaceae g__Exophiala NA
Seq23 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Mucoraceae g__Actinomucor s__elegans
Seq175 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Bionectriaceae g__Clonostachys s__rosea
Seq281 k__Heterolobosa p__Heterolobosa_phy_Incertae_sedis c__Heterolobosea o__Schizopyrenida f__Vahlkampfiidae g__Allovahlkampfia NA
Seq47 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Gibberella s__intricans
Seq137 k__Fungi p__Ascomycota c__Dothideomycetes o__Venturiales f__Sympoventuriaceae g__Ochroconis s__tshawytschae
Seq9 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Stachybotryaceae g__Albifimbria s__verrucaria
Seq42 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__zeylanica
Seq45 k__Fungi NA NA NA NA NA NA
Seq11 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Gibberella s__intricans
Seq89 k__Fungi p__Basidiomycota c__Cystobasidiomycetes o__Cystobasidiales f__Cystobasidiaceae g__Occultifur NA
Seq171 k__Fungi p__Ascomycota c__Dothideomycetes o__Venturiales f__Sympoventuriaceae g__Ochroconis s__tshawytschae
Seq178 k__Fungi p__Ascomycota c__Dothideomycetes o__Venturiales f__Sympoventuriaceae g__Scolecobasidium s__constrictum
Seq22 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Coniochaeta s__verticillata
Seq31 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Mucoraceae g__Actinomucor s__elegans
Seq65 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Coniochaeta NA
Seq75 NA NA NA NA NA NA NA
Seq85 k__Metazoa p__Nematoda NA NA NA NA NA
Seq54 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Coniochaeta NA
Seq121 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Gibberella s__intricans
Seq125 k__Fungi p__Ascomycota c__Dothideomycetes o__Venturiales f__Sympoventuriaceae g__Ochroconis s__tshawytschae
Seq181 k__Fungi p__Ascomycota c__Eurotiomycetes o__Eurotiales f__Aspergillaceae g__Penicillium s__bialowiezense
Seq24 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Gibberella s__intricans
Seq27 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Stachybotryaceae g__Stachybotrys s__chartarum
Seq66 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Stachybotryaceae g__Albifimbria s__verrucaria
Seq114 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__zeylanica
Seq15 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Gibberella s__intricans
Seq226 k__Fungi p__Ascomycota c__Dothideomycetes o__Venturiales f__Sympoventuriaceae g__Scolecobasidium s__constrictum
Seq26 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Chaetomiaceae g__Chaetomium s__iranianum
Seq59 k__Fungi p__Ascomycota c__Dothideomycetes o__Venturiales f__Sympoventuriaceae g__Ochroconis s__tshawytschae
Seq41 k__Fungi p__Basidiomycota c__Agaricomycetes o__Cantharellales f__Ceratobasidiaceae g__Waitea s__circinata
Seq157 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Chaetomiaceae g__Zopfiella NA
Seq124 k__Fungi p__Ascomycota c__Eurotiomycetes o__Eurotiales f__Aspergillaceae g__Aspergillus NA
Seq254 k__Metazoa p__Nematoda NA NA NA NA NA
Seq17 k__Metazoa p__Nematoda NA NA NA NA NA
Seq39 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Coniochaeta s__verticillata
Seq180 NA NA NA NA NA NA NA
Seq20 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Chaetomiaceae g__Chaetomium NA
Seq102 NA NA NA NA NA NA NA
Seq70 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Sordariales_fam_Incertae_sedis g__Staphylotrichum s__boninense
Seq150 k__Heterolobosa p__Heterolobosa_phy_Incertae_sedis c__Heterolobosea o__Schizopyrenida f__Vahlkampfiidae g__Allovahlkampfia NA
Seq132 NA NA NA NA NA NA NA

$salmon \(salmon\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 24 taxa and 36 samples ] sample_data() Sample Data: [ 36 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 24 taxa by 7 taxonomic ranks ] refseq() DNAStringSet: [ 24 reference sequences ]

\(salmon\)amp ampvis2 object with 4 elements. Summary of OTU table: Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads 36 24 121335 20 14955 1772 Avg#Reads 3370.42

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 22(92%) 18(75%) 15(62.5%) 14(58.33%) 13(54.17%) 13(54.17%) 11(45.83%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(salmon\)heat \(salmon\)heat_rel \(salmon\)tree \(salmon\)taxa

Kingdom Phylum Class Order Family Genus Species
Seq3 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Lasiosphaeriaceae g__Schizothecium s__inaequale
Seq19 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Psathyrellaceae g__Coprinellus s__flocculosus
Seq53 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Stachybotryaceae g__Albifimbria s__verrucaria
Seq99 k__Viridiplantae p__Anthophyta NA NA NA NA NA
Seq215 k__Viridiplantae NA NA NA NA NA NA
Seq30 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Chaetomiaceae g__Humicola s__sardiniae
Seq43 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Lasiosphaeriaceae g__Schizothecium NA
Seq68 NA NA NA NA NA NA NA
Seq63 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Mucoraceae g__Actinomucor s__elegans
Seq100 k__Fungi NA NA NA NA NA NA
Seq140 k__Fungi p__Ascomycota c__Dothideomycetes o__Pleosporales NA NA NA
Seq33 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Psathyrellaceae g__Coprinellus s__flocculosus
Seq245 NA NA NA NA NA NA NA
Seq138 k__Viridiplantae p__Anthophyta c__Eudicotyledonae NA NA NA NA
Seq8 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Lasiosphaeriaceae g__Schizothecium s__inaequale
Seq79 k__Viridiplantae p__Anthophyta NA NA NA NA NA
Seq161 k__Eukaryota_kgd_Incertae_sedis NA NA NA NA NA NA
Seq96 k__Metazoa p__Nematoda c__Chromadorea o__Rhabditida f__Cephalobidae g__Acrobeloides s__nanus
Seq147 k__Fungi NA NA NA NA NA NA
Seq123 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Chaetomiaceae g__Chaetomium s__jodhpurense
Seq152 k__Viridiplantae p__Anthophyta NA NA NA NA NA
Seq52 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Lasiosphaeriaceae g__Schizothecium NA
Seq40 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Mucoraceae g__Actinomucor s__elegans
Seq37 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Gibberella s__intricans
p.observed <- plot_alpha_w_toc(ps = ps_its, group = "Group", metric = c("Observed")) + 
  theme(axis.title.y = element_blank())

p.shannon <- plot_alpha_w_toc(ps = ps_its, group = "Group", metric = c("Shannon")) + 
  theme(axis.title.y = element_blank())

p.simpson <- plot_alpha_w_toc(ps = ps_its, group = "Group", metric = c("InvSimpson")) + 
  theme(axis.title.y = element_blank())

ggpubr::ggarrange(p.observed, p.shannon, p.simpson, ncol = 3)

ps_its@sam_data
##                  Bag Description  Group Day
## straw-its2-D01-1 D01       D01_1  early   3
## straw-its2-D01-2 D01       D01_2  early   3
## straw-its2-D01-3 D01       D01_3  early   3
## straw-its2-D03-1 D03       D03_1  early  14
## straw-its2-D03-2 D03       D03_2  early  14
## straw-its2-D03-3 D03       D03_3  early  14
## straw-its2-D05-1 D05       D05_1  early  28
## straw-its2-D05-2 D05       D05_2  early  28
## straw-its2-D05-3 D05       D05_3  early  28
## straw-its2-D05-4 D05       D05_4  early  28
## straw-its2-D05-5 D05       D05_5  early  28
## straw-its2-D07-1 D07       D07_1 middle  49
## straw-its2-D07-2 D07       D07_2 middle  49
## straw-its2-D07-3 D07       D07_3 middle  49
## straw-its2-D08-1 D08       D08_1 middle  63
## straw-its2-D08-2 D08       D08_2 middle  63
## straw-its2-D08-3 D08       D08_3 middle  63
## straw-its2-D10-1 D10       D10_1 middle  91
## straw-its2-D10-2 D10       D10_2 middle  91
## straw-its2-D10-3 D10       D10_3 middle  91
## straw-its2-D10-4 D10       D10_4 middle  91
## straw-its2-D10-5 D10       D10_5 middle  91
## straw-its2-D12-1 D12       D12_1   late 119
## straw-its2-D12-2 D12       D12_2   late 119
## straw-its2-D12-3 D12       D12_3   late 119
## straw-its2-D13-1 D13       D13_1   late 140
## straw-its2-D13-2 D13       D13_2   late 140
## straw-its2-D13-3 D13       D13_3   late 140
## straw-its2-D14-1 D14       D14_1   late 161
## straw-its2-D14-2 D14       D14_2   late 161
## straw-its2-D14-3 D14       D14_3   late 161
## straw-its2-D14-4 D14       D14_4   late 161
## straw-its2-D14-5 D14       D14_5   late 161
## straw-its2-D15-1 D15       D15_1   late 182
## straw-its2-D15-2 D15       D15_2   late 182
## straw-its2-D15-3 D15       D15_3   late 182
p.observed <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = c("Observed")) + 
  theme(axis.title.y = element_blank())

p.shannon <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = c("Shannon")) + 
  theme(axis.title.y = element_blank())

p.simpson <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = c("InvSimpson")) + 
  theme(axis.title.y = element_blank())

ggpubr::ggarrange(p.observed, p.shannon, p.simpson, ncol = 3)

ps.m <- phyloseq::psmelt(ps_its)
ps.m <- ps.m %>% 
  mutate_if(is.character, as.factor) 

ps.data.out <- ps.m %>%
  select(-Group) %>% 
  pivot_wider(names_from = c(Day, Description, Sample), values_from = Abundance, values_fill = 0) 

#create empty dataframe with columnnames
external_empty_dataframe <- data.frame(OTU=factor(), cluster=factor(), stringsAsFactors = FALSE)

for (i in names(l_its)) {
  a <- taxa_names(l_its[[i]][["ps"]])
  b <- rep(i, length(a))
  d <- data.frame(OTU = as.factor(a),
                  cluster = as.factor(b))
  external_empty_dataframe <- rbind(external_empty_dataframe, d)
}

clusters.otu.df <- external_empty_dataframe
# add exl taxa -- taxa "exl"
d <- data.frame(OTU = as.factor(ps.its.exl.taxa),
                cluster = as.factor(rep("exl", length(ps.its.exl.taxa))))

clusters.otu.df <- rbind(clusters.otu.df, d)
ps.data.out.exl <- left_join(clusters.otu.df, ps.data.out, by="OTU")
# write.table(ps.data.out.exl, file = "ps.its.data.out.tsv", sep = "\t")


ps.data.out.exl